In [1]:
using CSV, DataFrames, CategoricalArrays
using Statistics
using CategoricalArrays

# Top 10

In [18]:
train_grouped = CSV.read("Training_Filtered_top10_short.csv", DataFrame)
test_grouped = CSV.read("Testing_Filtered_top10_short.csv", DataFrame)

train_grouped = train_grouped[:, 2:54]
#44819 rows

test_grouped = test_grouped[:, 2:54];
#44656 rows

In [21]:
train_X = train_grouped[:,1:52]
train_y = train_grouped[:,53];


In [25]:
test_X = test_grouped[:,1:52]
test_y = test_grouped[:,53];


## Specify treatments

In [26]:
treatment_cols =  ["med_count_1", "med_count_2", "med_count_3", "med_count_4", "med_count_5", "med_count_6"];

excluded_cols = [:med_count_1, :med_count_2, :med_count_3, 
                 :med_count_4, :med_count_5, :med_count_6, :readmitted];

In [27]:
# Select only feature columns for train and test sets
train_X = select(train_grouped, Not(excluded_cols))
test_X = select(test_grouped, Not(excluded_cols));

In [28]:
# Extract only treatment columns for train and test sets
treatments_train = select(train_grouped, treatment_cols)
treatments_test = select(test_grouped, treatment_cols);

In [29]:
train_T_categorical = string.("[", treatments_train.med_count_1, ",", treatments_train.med_count_2, ",", 
    treatments_train.med_count_3,",",treatments_train.med_count_4, ",",treatments_train.med_count_5,",",
    treatments_train.med_count_6, "]")

test_T_categorical = string.("[", treatments_test.med_count_1, ",", treatments_test.med_count_2, ",", 
    treatments_test.med_count_3,",",treatments_test.med_count_4, ",",treatments_test.med_count_5,",",
    treatments_test.med_count_6, "]");


In [30]:
# Extract the outcome variable
train_y = train_grouped[:, :readmitted]
test_y = test_grouped[:, :readmitted];

## Optimal Policy Trees

In [31]:
seed=42

categorical_reward_lnr = IAI.CategoricalClassificationRewardEstimator(
    propensity_estimator=IAI.RandomForestClassifier(random_seed=seed),
    outcome_estimator=IAI.RandomForestClassifier(random_seed=seed),
    reward_estimator=:doubly_robust,
    random_seed=seed,
)

Unfitted CategoricalClassificationRewardEstimator:
  propensity_estimator: Unfitted RandomForestClassifier:
    random_seed: 42
  outcome_estimator:    Unfitted RandomForestClassifier:
    random_seed: 42
  reward_estimator:     doubly_robust
  random_seed:          42

### Training Set

In [32]:
train_predictions, train_reward_score = IAI.fit_predict!(
    categorical_reward_lnr, train_X, train_T_categorical, train_y)

train_rewards = train_predictions[:reward]
train_reward_score[:outcome]


[33m[1m└ [22m[39m27f100b463b00d81bd66a054a921d6c889740dc7a5d39175d51c63ff2de208f5


Dict{String, Float64} with 10 entries:
  "[0,1,0,0,0,1]" => 0.0537097
  "[0,0,0,0,0,0]" => 0.0896993
  "[0,0,0,1,0,1]" => 0.0438627
  "[1,0,0,0,0,1]" => 0.0702976
  "[0,1,0,0,0,0]" => 0.0558494
  "[0,0,0,1,0,0]" => 0.0519101
  "[1,1,0,0,0,1]" => 0.0544617
  "[1,1,0,0,0,0]" => 0.0426775
  "[0,0,0,0,0,1]" => 0.07718
  "[1,0,0,0,0,0]" => 0.0865948

In [33]:
train_reward_score[:propensity]
#If outcomes are good, and propsity is bad--> can go ahead 

0.23646724931033775

### Testing Set

In [34]:
test_predictions, test_reward_score = IAI.fit_predict!(
    categorical_reward_lnr, test_X, test_T_categorical, test_y)

test_rewards = test_predictions[:reward]
test_reward_score[:outcome]

Dict{String, Float64} with 10 entries:
  "[0,1,0,0,0,1]" => 0.0507943
  "[0,0,0,0,0,0]" => 0.0851707
  "[0,0,0,1,0,1]" => 0.0514181
  "[1,0,0,0,0,1]" => 0.0608501
  "[0,1,0,0,0,0]" => 0.0672094
  "[0,0,0,1,0,0]" => 0.0480996
  "[1,1,0,0,0,1]" => 0.0437438
  "[1,1,0,0,0,0]" => 0.0410799
  "[0,0,0,0,0,1]" => 0.0811204
  "[1,0,0,0,0,0]" => 0.0544569

In [35]:
test_reward_score[:propensity]


0.23640397561997198