In [None]:
import os 
os.chdir('../../')
print("Current working directory is now: ", os.getcwd())

import pandas as pd 
import numpy as np
import csv
from sklearn.linear_model import LogisticRegression

import ky_utils.ky_interpretable_functions as interpret
import ky_utils.ky_riskslim as slim
import ky_utils.ky_stumps as stumps

from pprint import pprint
from riskslim.helper_functions import load_data_from_csv, print_model
from sklearn.metrics import roc_auc_score

In [None]:
### EBM & CART

In [None]:
KY_data = pd.read_csv("~/Documents/Duke/Cynthia Research/data/ky-fl-data/KY-cleaned.csv").sort_values('person_id')
FL_data = pd.read_csv("~/Documents/Duke/Cynthia Research/data/ky-fl-data/FL-cleaned.csv").sort_values('person_id')

## split x and y 
KY_X = KY_data.loc[:,:'current_violence20']
KY_Y = KY_data['drug_six_month'].values
FL_X = FL_data.loc[:,:'current_violence20']
FL_Y = FL_data['drug_six_month'].values

In [None]:
#### CART
depth = [5,6,7,8,9,10]
cart_summary = interpret.CART(KY_X, KY_Y, FL_X, FL_Y, depth, seed=816)

### EBM
estimators = [60]
depth = [2]
learning_rate = [0.1]
ebm_summary = interpret.EBM(KY_X, KY_Y, FL_X, FL_Y, learning_rate, depth, estimators, seed=816)

In [None]:
print("CART: ", np.mean(cart_summary['FL_score']), np.mean(cart_summary['KY_score']))
print("EMB: ", np.mean(ebm_summary['FL_score']), np.mean(ebm_summary['KY_score']))

In [None]:
---

In [None]:
### Lasso Stumps

In [None]:
KY_stumps = pd.read_csv("~/Documents/Duke/Cynthia Research/data/ky-fl-data/KY-stumps.csv").sort_values('person_id')
FL_stumps = pd.read_csv("~/Documents/Duke/Cynthia Research/data/ky-fl-data/FL-stumps.csv").sort_values('person_id')

## split x and y 
KY_X = KY_stumps.loc[:,:'current_violence201']
KY_Y = KY_stumps['drug_six_month'].values
FL_X = FL_stumps.loc[:,:'current_violence201']
FL_Y = FL_stumps['drug_six_month'].values

## columns 
cols = KY_X.columns[1:]

In [None]:
single_stump_model = stumps.stump_model(KY_X, KY_Y, FL_X, FL_Y, 0.0018, cols, 816)
## unique original features
unique_stumps = []
for i in single_stump_model['features']:
    unique_stumps.append(''.join([j for j in i if not j.isdigit()]))
print(len(np.unique(unique_stumps)))

In [None]:
c_grid={'C': [0.001, 0.0015, 0.0018]}
stumps_summary = stumps.stump_cv(KY_X, KY_Y, FL_X, FL_Y, cols, c_grid, seed=816)

In [None]:
print("CART: ", np.mean(cart_summary['FL_score']), np.mean(cart_summary['KY_score']))
print("EMB: ", np.mean(ebm_summary['FL_score']), np.mean(ebm_summary['KY_score']))
print("Additive: ", np.mean(stumps_summary['FL_score']), np.mean(stumps_summary['KY_score']))

In [None]:
---

In [None]:
### RiskSLIM

In [None]:
## train on best param chosen by Lasso Stumps from above
single_stump_model = stumps.stump_model(KY_X, 
                                        KY_Y, 
                                        FL_X, 
                                        FL_Y, 
                                        0.0006, 
                                        cols, 
                                        816)
selected_features = single_stump_model['features']
len(selected_features)

In [None]:
### Subset features
sub_FL_X = FL_stumps.loc[:, selected_features]
sub_KY_X = KY_stumps.loc[:, selected_features]
sub_KY_X.insert(0, '(Intercept)', 1)

In [None]:
riskslim_summary = slim.risk_cv(sub_KY_X, 
                                KY_Y, 
                                sub_FL_X, 
                                FL_Y, 
                                y_label = 'drug_six_month', 
                                max_coef = 5, 
                                max_coef_number = 5, 
                                max_runtime=1000,
                                max_offset=100,
                                c=1e-6, 
                                seed=816)

In [None]:
print("CART: ", np.mean(cart_summary['FL_score']), np.mean(cart_summary['KY_score']))
print("EMB: ", np.mean(ebm_summary['FL_score']), np.mean(ebm_summary['KY_score']))
print("Additive: ", np.mean(stumps_summary['FL_score']), np.mean(stumps_summary['KY_score']))
print("RiskSLIM: ", np.mean(riskslim_summary['FL_score']), np.mean(riskslim_summary['KY_score']))

In [None]:
#### save results
summary_drug6_ky_inter_model = {"cart": cart_summary,
                                "ebm": ebm_summary,
                                "stumps": stumps_summary,
                                "riskslim": riskslim_summary}

In [None]:
path = "./results/interpretable/six-month/"
results = [["Drug",             
            np.str(round(np.mean(cart_summary['FL_score']),3)) + " (" + np.str(round(np.std(cart_summary['FL_score']),3)) + ")", 
            np.str(round(np.mean(ebm_summary['FL_score']), 3)) + " (" + np.str(round(np.std(ebm_summary['FL_score']),3)) + ")", 
            np.str(round(np.mean(stumps_summary['FL_score']),3)) + " (" + np.str(round(np.std(stumps_summary['FL_score']),3)) + ")", 
            np.str(round(np.mean(riskslim_summary['FL_score']), 3)) + " (" + np.str(round(np.std(riskslim_summary['FL_score']),3)) + ")"]]
with open(path + 'six-month-ky-interpretable-fl-score.csv', 'a') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerows(results)

In [None]:
path = "./results/interpretable/six-month/"
results = [["Drug",             
            np.str(round(np.mean(cart_summary['KY_score']),3)) + " (" + np.str(round(np.std(cart_summary['KY_score']),3)) + ")", 
            np.str(round(np.mean(ebm_summary['KY_score']), 3)) + " (" + np.str(round(np.std(ebm_summary['KY_score']),3)) + ")", 
            np.str(round(np.mean(stumps_summary['KY_score']),3)) + " (" + np.str(round(np.std(stumps_summary['KY_score']),3)) + ")", 
            np.str(round(np.mean(riskslim_summary['KY_score']), 3)) + " (" + np.str(round(np.std(riskslim_summary['KY_score']),3)) + ")"]]
with open(path + 'six-month-ky-interpretable-ky-score.csv', 'a') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerows(results)