In [None]:
import os 
os.chdir('../../../')
print("Current working directory is now: ", os.getcwd())

import pandas as pd 
import numpy as np
import csv
import utils.baseline_functions as base

#### Load Data

In [2]:
### train data
data = pd.read_csv("./broward/data/broward_data.csv").sort_values('person_id')
x = data.loc[:,:'five_year']
y = data['drug_six_month'].values

#### Modeling

In [None]:
#### Logistic
c = np.linspace(1e-5, 1e-2, 100).tolist()
logistic_summary = base.Logistic(X=x,
                                 Y=y, 
                                 C=c,
                                 seed=816)

#### Lasso
c = np.linspace(1e-5, 1e-2, 100).tolist()
lasso_summary = base.Lasso(X=x,
                           Y=y,
                           C=c,
                           seed=816)

#### LinearSVM
c = np.linspace(1e-5, 1e-2, 100).tolist()
svm_summary = base.LinearSVM(X=x,
                             Y=y,
                             C=c,
                             seed=816)

#### Random Forest
n_estimators =  [50,100,200,400,600]
depth = [1,2,3]
impurity = [0.001, 0.002, 0.003, 0.004, 0.005, \
            0.006, 0.007, 0.008, 0.009, 0.01]
rf_summary = base.RF(X=x,
                     Y=y, 
                     depth=depth, 
                     estimators=n_estimators, 
                     impurity=impurity,
                     seed=816)

#### XGBoost
learning_rate = [0.05]
depth = [1,2,3]
n_estimators = [50,100,200,400,600]
gamma = [6,8,10,12]
child_weight = [6,8,10,12]
subsample = [0.5]
xgb_summary = base.XGB(X=x,
                       Y=y,
                       learning_rate=learning_rate, 
                       depth=depth, 
                       estimators=n_estimators,
                       gamma=gamma, 
                       child_weight=child_weight, 
                       subsample=subsample, 
                       seed=816)

#### save results
summary_drug6_FL = {"Logistic": logistic_summary,
                    "Lasso": lasso_summary,
                    "LinearSVM": svm_summary,
                    "RF": rf_summary,
                    "XGBoost": xgb_summary}

#### Results

In [None]:
results, auc = [], []
for model_name, model_summary in summary_drug6_FL.items():
    results.append([model_name, 
                    np.mean(model_summary['holdout_test_auc']), 
                    np.mean(model_summary['auc_diffs'])])
    auc.append(np.mean(model_summary['holdout_test_auc'])) 
results

#### Save Results

In [5]:
path = "./broward/logs/baselines/"
results = [["Drug", np.str((round(np.mean(logistic_summary['holdout_test_auc']), 3))) + " (" + np.str(round(np.std(logistic_summary['holdout_test_auc']), 3)) + ")", 
            np.str(round(np.mean(lasso_summary['holdout_test_auc']),3)) + " (" + np.str(round(np.std(lasso_summary['holdout_test_auc']), 3)) + ")", 
            np.str(round(np.mean(svm_summary['holdout_test_auc']),3)) + " (" + np.str(round(np.std(svm_summary['holdout_test_auc']), 3)) + ")", 
            np.str(round(np.mean(rf_summary['holdout_test_auc']),3)) + " (" + np.str(round(np.std(rf_summary['holdout_test_auc']), 3)) + ")", 
            np.str(round(np.mean(xgb_summary['holdout_test_auc']),3)) + " (" + np.str(round(np.std(xgb_summary['holdout_test_auc']), 3)) + ")", 
            round(np.max(auc) - np.min(auc), 3)]]
with open(path + 'FL-six-month-baseline-summary.csv', 'a') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerows(results)