In [1]:
import os 
os.chdir('../../../')
print("Current working directory is now: ", os.getcwd())

import pandas as pd 
import numpy as np
import csv
import utils.baseline_functions as base

In [2]:
### train data
train_data = pd.read_csv("kentucky/data/processed/train.csv")
train_data = train_data.drop(['fta_risk_score_raw','nca_risk_score_raw',
                  'pvf_risk_score_raw', 'fta_calc', 'nca_calc', 'pvf_calc'], axis=1)
train_X = train_data.loc[:,:'current_violence']
train_Y = train_data['recid_F_six_month']

### test data
test_data = pd.read_csv("kentucky/data/processed/test.csv")
test_data = test_data.drop(['fta_risk_score_raw','nca_risk_score_raw',
                  'pvf_risk_score_raw', 'fta_calc', 'nca_calc', 'pvf_calc'], axis=1)
test_X = test_data.loc[:,:'current_violence']
test_Y = test_data['recid_F_six_month']

In [None]:
#### Logistic
c = [1e-4, 1e-3, 0.01, 0.1, 1]
logistic_summary = base.Logistic(train_x=train_X, 
                                 train_y=train_Y, 
                                 test_x=test_X, 
                                 test_y=test_Y, 
                                 C=c,
                                 seed=816)

#### Lasso
alpha = [1e-4, 1e-3, 0.01, 0.1, 1]
lasso_summary = base.Lasso(train_x=train_X, 
                           train_y=train_Y, 
                           test_x=test_X, 
                           test_y=test_Y, 
                           alpha=alpha,
                           seed=816)

#### LinearSVM
c = [1e-4, 1e-3, 0.01, 0.1, 1]
svm_summary = base.LinearSVM(train_x=train_X, 
                             train_y=train_Y, 
                             test_x=test_X, 
                             test_y=test_Y, 
                             C=c,
                             seed=816)

#### CART
depth = [7,8,9]
cart_summary = base.CART(train_x=train_X, 
                         train_y=train_Y, 
                         test_x=test_X, 
                         test_y=test_Y, 
                         depth=depth,
                         seed=816)

#### Random Forest
n_estimators =  [40,50]
depth = [6,7]
rf_summary = base.RF(train_x=train_X, 
                     train_y=train_Y, 
                     test_x=test_X, 
                     test_y=test_Y, 
                     depth=depth, 
                     estimators=n_estimators, 
                     seed=816)

#### XGBoost
learning_rate = [0.01]
depth = [6]
n_estimators = [200]
xgb_summary = base.XGB(train_x=train_X, 
                       train_y=train_Y, 
                       test_x=test_X, 
                       test_y=test_Y, 
                       learning_rate=learning_rate, 
                       depth=depth, 
                       estimators=n_estimators, 
                       seed=816)

In [14]:
results = [["Logistic", logistic_summary['best_validation_auc'], logistic_summary['best_validation_auc_diff'], logistic_summary['best_param']],
           ["Lasso", lasso_summary['best_validation_auc'], lasso_summary['best_validation_auc_diff'], lasso_summary['best_param']],
           ["LinearSVM", svm_summary['best_validation_auc'], svm_summary['best_validation_auc_diff'], svm_summary['best_param']],
           ["CART", cart_summary['best_validation_auc'], cart_summary['best_validation_auc_diff'], cart_summary['best_param']],
           ["RF", rf_summary['best_validation_auc'], rf_summary['best_validation_auc_diff'], rf_summary['best_param']],
           ["XGBoost", xgb_summary['best_validation_auc'], xgb_summary['best_validation_auc_diff'], xgb_summary['best_param']]]

In [15]:
results

[['Logistic', 0.7262262304280555, 0.0003779573528246427, {'C': 0.1}],
 ['Lasso', 0.7224235099905708, 0.00038163202991159384, {'alpha': 0.0001}],
 ['LinearSVM', 0.7256526577303598, 0.000327777740225188, {'C': 0.1}],
 ['CART', 0.7308200304942606, 0.013414227454195182, {'max_depth': 8}],
 ['RF',
  0.7382671661372947,
  0.01736400516544112,
  {'max_depth': 10, 'n_estimators': 50}],
 ['XGBoost',
  0.743203919672335,
  0.020142671817128255,
  {'learning_rate': 0.03, 'max_depth': 7, 'n_estimators': 200}]]

In [7]:
path = "C:\\Users\\binha\\Documents\\Duke\\Cynthia Research\\KY-analysis-mytrials\\KY Recidivism\\KY Results\\Baselines\\Six Month\\"
results = [["Felony", round(logistic_summary['best_validation_auc'],3), round(logistic_summary['best_validation_std'], 3), 
            round(lasso_summary['best_validation_auc'],3), round(lasso_summary['best_validation_std'], 3), 
            round(svm_summary['best_validation_auc'],3), round(lasso_summary['best_validation_std'], 3), 
            round(cart_summary['best_validation_auc'],3), round(cart_summary['best_validation_std'], 3), 
            round(rf_summary['best_validation_auc'],3), round(rf_summary['best_validation_std'], 3), 
            round(xgb_summary['best_validation_auc'],3), round(xgb_summary['best_validation_std'], 3)]]
with open(path + 'Six Month Baseline Summary.csv', 'a') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerows(results)