In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import csv

from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
from sklearn.metrics import roc_curve, auc
from sklearn.utils import shuffle

from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show
from interpret.perf import ROC

#### Load Data -- Continuous

In [2]:
### load data
data = pd.read_csv("~/Documents/Duke/Cynthia Research/KY-analysis-mytrials/KY Recidivism/KY data/kentucky_data.csv")
data = data.drop(['PersonID', 'screening_date','fta_risk_score_raw','nca_risk_score_raw',
                  'pvf_risk_score_raw', 'fta_calc', 'nca_calc', 'pvf_calc'], axis=1)
data = data[data['p_charges'] <= 20]

X = data.loc[:,:'current_violence'].values
Y = data['recid_two_year'].values
cols = data.columns[:-14]

### Holdout Test Set

In [3]:
ebm = ExplainableBoostingClassifier(feature_names=cols, 
                                    n_estimators=10, 
                                    max_tree_splits=3, 
                                    learning_rate=1, 
                                    holdout_split=0.15, 
                                    random_state=816).fit(X, Y)

In [4]:
Y_pred = ebm.predict_proba(X)[:,1]
fpr, tpr, thresholds = roc_curve(Y, Y_pred)
heldout_test_auc = auc(fpr, tpr)
heldout_test_auc

0.7367962066696017

In [5]:
ebm_global = ebm.explain_global()
show(ebm_global)

In [46]:
#log model results to the model performance folder, as per standards
path = "C:\\Users\\binha\\Documents\\Duke\\Cynthia Research\\KY-analysis-mytrials\\KY Recidivism\\KY Results\\Models\\Six Month\\"

train_auc_mean, train_auc_std = np.mean(train_auc), np.std(train_auc)
test_auc_mean, test_auc_std = np.mean(test_auc), np.std(test_auc)
                   
results = [["Model", "train_auc_mean", "train_auc_std", "validation_auc_mean", "validation_auc_std", "holdout_test_auc", "holdout_test_acc"],
    ["Logistic Regression", train_auc_mean, train_auc_std, test_auc_mean, test_auc_std, heldout_test_auc, heldout_test_acc ]]

with open(path + 'GAM.csv', 'w') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerows(results)