In [2]:
import os 
os.chdir('../../../../')
print("Current working directory is now: ", os.getcwd())

import pandas as pd 
import numpy as np
import csv
import utils.baseline_functions as base

In [3]:
### train data
data = pd.read_csv("~/Documents/Duke/Cynthia Research/KY-analysis-mytrials/broward/data/broward_data.csv")
x = data.loc[:,:'five_year']
y = data['recid_F2'].values

In [None]:
#### Logistic
c = [1e-5, 1e-4, 1e-3, 1e-2]
logistic_summary = base.Logistic(X=x,
                                 Y=y, 
                                 C=c,
                                 seed=816)

#### Lasso
c = [1e-2, 0.1, 1]
lasso_summary = base.Lasso(X=x,
                           Y=y,
                           C=c,
                           seed=816)

#### LinearSVM
c = [1e-5, 1e-4, 1e-3]
svm_summary = base.LinearSVM(X=x,
                             Y=y,
                             C=c,
                             seed=816)

#### CART
depth = [1,2]
impurity = [0.001, 0.003]
split = [2,3,4]
cart_summary = base.CART(X=x,
                         Y=y,
                         depth=depth,
                         split=split,
                         impurity=impurity, 816)

#### Random Forest
n_estimators =  [10,30]
depth = [1]
impurity = [0.003, 0.005]
rf_summary = base.RF(X=x,
                     Y=y, 
                     depth=depth, 
                     estimators=n_estimators, 
                     impurity=impurity,
                     seed=816)

#### XGBoost
learning_rate = [0.05,0.07]
depth = [2,3]
n_estimators = [20]
gamma = [12,14,16]
child_weight = [12,14,16]
subsample = [0.3, 0.5]
xgb_summary = base.XGB(X=x,
                       Y=y,
                       learning_rate=learning_rate, 
                       depth=depth, 
                       estimators=n_estimators,
                       gamma=gamma, 
                       child_weight=child_weight, 
                       subsample=subsample, 
                       seed=816)

In [7]:
results = [["Logistic", np.mean(logistic_summary['holdout_test_auc']), np.mean(logistic_summary['auc_diffs'])],
           ["Lasso", np.mean(lasso_summary['holdout_test_auc']), np.mean(lasso_summary['auc_diffs'])],
           ["LinearSVM", np.mean(svm_summary['holdout_test_auc']), np.mean(svm_summary['auc_diffs'])],
           ["CART", np.mean(cart_summary['holdout_test_auc']), np.mean(cart_summary['auc_diffs'])],
           ["RF", np.mean(rf_summary['holdout_test_auc']), np.mean(rf_summary['auc_diffs'])],
           ["XGBoost", np.mean(xgb_summary['holdout_test_auc']), np.mean(xgb_summary['auc_diffs'])]]

In [8]:
results

[['Logistic', 0.6614746768019618, 0.011377324404064693, {'C': 0.001}],
 ['Lasso', 0.6549675472938596, 0.02305068856439374, {'alpha': 0.01}],
 ['LinearSVM', 0.660909065394619, 0.011403899367495973, {'C': 0.0001}],
 ['CART',
  0.6032644173165788,
  0.021635960336564564,
  {'max_depth': 2, 'min_impurity_decrease': 0.007, 'min_samples_split': 2}],
 ['RF',
  0.6451308051050336,
  0.021481710522960884,
  {'max_depth': 1, 'min_impurity_decrease': 0.01, 'n_estimators': 50}],
 ['XGBoost',
  0.6512014814269436,
  0.029307127832094215,
  {'gamma': 12,
   'learning_rate': 0.05,
   'max_depth': 3,
   'min_child_weight': 14,
   'n_estimators': 20,
   'subsample': 0.5}]]

In [None]:
params = [logistic_summary['best_param'], 
          lasso_summary['best_param'], 
          svm_summary['best_param'], 
          cart_summary['best_param'],
          rf_summary['best_param'],
          xgb_summary['best_param']]

In [None]:
params

In [34]:
path = "C:\\Users\\binha\\Documents\\Duke\\Cynthia Research\\KY-analysis-mytrials\\broward\\broward models\\model results\\Baselines\\Two Year\\"
results = [["Felony", np.str((round(np.mean(logistic_summary['holdout_test_auc']), 3))) + " (" + np.str(round(np.std(logistic_summary['holdout_test_auc']), 3)) + ")", 
            np.str(round(np.mean(lasso_summary['holdout_test_auc']),3)) + " (" + np.str(round(np.std(lasso_summary['holdout_test_auc']), 3)) + ")", 
            np.str(round(np.mean(svm_summary['holdout_test_auc']),3)) + " (" + np.str(round(np.std(svm_summary['holdout_test_auc']), 3)) + ")", 
            np.str(round(np.mean(cart_summary['holdout_test_auc']),3)) + " (" + np.str(round(np.std(cart_summary['holdout_test_auc']), 3)) + ")", 
            np.str(round(np.mean(rf_summary['holdout_test_auc']),3)) + " (" + np.str(round(np.std(rf_summary['holdout_test_auc']), 3)) + ")", 
            np.str(round(np.mean(xgb_summary['holdout_test_auc']),3)) + " (" + np.str(round(np.std(xgb_summary['holdout_test_auc']), 3)) + ")"]]
with open(path + 'Two Year Baseline Summary.csv', 'a') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerows(results)