In [11]:
import pandas as pd 
import numpy as np
import csv
import advance_functions as advance
import RiskSLIM as slim
import stumps
from sklearn.linear_model import LogisticRegression
from riskslim.helper_functions import load_data_from_csv, print_model

# restore saved variables
%store -r summary_violent2_ky_inter_model

no stored variable summary_violent2_ky_inter_model


### EBM & CART

In [12]:
KY_data = pd.read_csv("~/Documents/Duke/Cynthia Research/psa-analysis - test/kentucky/models/ky_fl_combined/data preparation/KY_cleaned")
FL_data = pd.read_csv("~/Documents/Duke/Cynthia Research/psa-analysis - test/kentucky/models/ky_fl_combined/data preparation/FL_cleaned")

## split x and y 
KY_X = KY_data.loc[:,:'current_violence']
KY_Y = KY_data['recid_violence_two_year'].values
FL_X = FL_data.loc[:,:'current_violence']
FL_Y = FL_data['recid_violence_two_year'].values

In [13]:
#### CART
depth = [8,9,10]
cart_summary = advance.CART(KY_X, KY_Y, FL_X, FL_Y, depth, seed=816)

### EBM
estimators = [20]
depth = [2]
learning_rate = [0.5]
ebm_summary = advance.EBM(KY_X, KY_Y, FL_X, FL_Y, 
                          learning_rate=learning_rate, 
                          depth=depth, 
                          estimators=estimators, 
                          seed=816)

In [14]:
cart_summary, ebm_summary

({'best_auc': 0.7988550758281007,
  'auc_diff': 0.02229662457957915,
  'best_param': {'max_depth': 8},
  'FL_score': 0.6141025843548591},
 {'best_auc': 0.8081345640712394,
  'auc_diff': 0.004205608620147849,
  'best_param': {'learning_rate': 0.5,
   'max_tree_splits': 2,
   'n_estimators': 20},
  'FL_score': 0.5640796790092254})

### Lasso Stumps

In [15]:
KY_stumps = pd.read_csv("~/Documents/Duke/Cynthia Research/psa-analysis - test/kentucky/models/ky_fl_combined/data preparation/KY_stumps")
FL_stumps = pd.read_csv("~/Documents/Duke/Cynthia Research/psa-analysis - test/kentucky/models/ky_fl_combined/data preparation/FL_stumps")

## split x and y 
KY_X = KY_stumps.loc[:,:'current_violence>=1']
KY_Y = KY_stumps['recid_violence_two_year'].values
FL_X = FL_stumps.loc[:,:'current_violence>=1']
FL_Y = FL_stumps['recid_violence_two_year'].values

## columns 
cols = KY_X.columns

In [16]:
c_grid = {'C': [0.03, 0.05, 0.07]}
stumps_summary = stumps.stump_cv(KY_X, KY_Y, FL_X, FL_Y, cols, c_grid, seed=816)

In [17]:
stumps_summary

{'best_auc': 0.8095436457263578,
 'best_params': {'C': 0.05},
 'auc_diffs': 0.0024946687247843213,
 'FL_score': 0.6178677492733476}

### RiskSLIM

In [18]:
## train on best param chosen by Lasso Stumps from above
lasso = LogisticRegression(class_weight='balanced', solver='liblinear', penalty='l1', C=0.05, random_state=816).fit(KY_X, KY_Y)
selected_features = cols[lasso.coef_[0] != 0].tolist()
len(selected_features)

65

In [19]:
### Subset features
sub_FL_X = FL_stumps.loc[:, selected_features]
sub_KY_X = KY_stumps.loc[:, selected_features]
sub_KY_X.insert(0, '(Intercept)', 1)

In [22]:
riskslim_summary = slim.risk_cv(sub_KY_X, KY_Y, sub_FL_X, FL_Y, 
                                y_label = 'general2', 
                                max_coef = 20, 
                                max_coef_number = 10, 
                                max_runtime=200, 
                                c=1e-2, 
                                seed=816)

setting c0 = 0.0 to ensure that intercept is not penalized
09/17/19 @ 04:14 PM | 2701 rows in lookup table
09/17/19 @ 04:14 PM | ------------------------------------------------------------
09/17/19 @ 04:14 PM | runnning initialization procedure
09/17/19 @ 04:14 PM | ------------------------------------------------------------
09/17/19 @ 04:14 PM | CPA produced 2 cuts
09/17/19 @ 04:14 PM | running naive rounding on 89 solutions
09/17/19 @ 04:14 PM | best objective value: 0.2406
09/17/19 @ 04:14 PM | rounding produced 5 integer solutions
09/17/19 @ 04:14 PM | best objective value is 0.2748
09/17/19 @ 04:14 PM | running sequential rounding on 89 solutions
09/17/19 @ 04:14 PM | best objective value: 0.2406
09/17/19 @ 04:14 PM | sequential rounding produced 5 integer solutions
09/17/19 @ 04:14 PM | best objective value: 0.2657
09/17/19 @ 04:14 PM | polishing 10 solutions
09/17/19 @ 04:14 PM | best objective value: 0.2657
09/17/19 @ 04:14 PM | polishing produced 2 integer solutions
09/17/19



Lazy constraint(s) or lazy constraint callback is present.
    Disabling dual reductions (CPX_PARAM_REDUCE) in presolve.
    Disabling non-linear reductions (CPX_PARAM_PRELINEAR) in presolve.
         Disabling repeat represolve because of lazy constraint/incumbent callback.
09/17/19 @ 04:14 PM | adding 251 initial cuts
1 of 1 MIP starts provided solutions.
MIP start 'mip_start_0' defined initial solution with objective 0.2657.
Tried aggregator 1 time.
Reduced MIP has 67 rows, 134 columns, and 263 nonzeros.
Reduced MIP has 65 binaries, 67 generals, 0 SOSs, and 0 indicators.
Presolve time = 0.02 sec. (0.16 ticks)
Probing time = 0.00 sec. (0.07 ticks)
MIP emphasis: balance optimality and feasibility.
MIP search method: traditional branch-and-cut.
Parallel mode: none, using 1 thread.
Root relaxation solution time = 0.00 sec. (0.08 ticks)

        Nodes                                         Cuts/
   Node  Left     Objective  IInf  Best Integer    Best Bound    ItCnt     Gap         Varia

In [23]:
riskslim_summary

{'FL_Score': 0.5850688740048022}

In [24]:
#### save results
summary_violent2_ky_inter_model = {"CART": cart_summary,
                                    "EBM": ebm_summary,
                                    "Stumps": stumps_summary,
                                    "RiskSLIM": riskslim_summary}
%store summary_violent2_ky_inter_model

Stored 'summary_violent2_ky_inter_model' (dict)


In [26]:
path = "C:\\Users\\binha\\Documents\\Duke\\Cynthia Research\\KY-analysis-mytrials\\KY Recidivism\\KY Results\\KY_FL\\KY Model\\interpretable\\Two Year\\"
results = [["Violent",             
            round(cart_summary['FL_score'],3), 
            round(ebm_summary['FL_score'], 3), 
            round(stumps_summary['FL_score'],3), 
            round(riskslim_summary['FL_Score'], 3)]]
with open(path + 'Two Year KY_inter_model.csv', 'a') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerows(results)