In [1]:
import os 
os.chdir('../../')
print("Current working directory is now: ", os.getcwd())

import pandas as pd
import numpy as np
import csv
import fl_utils.fl_baseline_functions as base

Current working directory is now:  /home/jovyan/work/cynthia/interpretable-machine-learning/ky-fl-combined/fl-model


In [2]:
KY_data = pd.read_csv("/home/jovyan/work/cynthia/data/ky-fl-data/KY-cleaned.csv").sort_values('person_id')
FL_data = pd.read_csv("/home/jovyan/work/cynthia/data/ky-fl-data/FL-cleaned.csv").sort_values('person_id')

## split x and y 
KY_X = KY_data.loc[:,:'current_violence20']
KY_Y = KY_data['violent_six_month'].values
FL_X = FL_data.loc[:,:'current_violence20']
FL_Y = FL_data['violent_six_month'].values

### Model

In [3]:
#### Logistic
c = np.linspace(1e-5, 1e-2, 100).tolist()
logistic_summary = base.Logistic(KY_X, KY_Y, FL_X, FL_Y, C=c,seed=816)

#### Lasso
c = np.linspace(1e-5, 1e-2, 100).tolist()
lasso_summary = base.Lasso(KY_X, KY_Y, FL_X, FL_Y, C=c,seed=816)

#### LinearSVM
c = np.linspace(1e-5, 1e-2, 100).tolist()
svm_summary = base.LinearSVM(KY_X, KY_Y, FL_X, FL_Y, C=c,seed=816)

#### Random Forest
n_estimators =  [50,100,200,400,600]
depth = [1,2,3]
impurity = [0.001, 0.002, 0.003, 0.004, 0.005, \
            0.006, 0.007, 0.008, 0.009, 0.01]
rf_summary = base.RF(KY_X, KY_Y, FL_X, FL_Y,  
                     depth=depth, 
                     estimators=n_estimators, 
                     impurity=impurity,
                     seed=816)

#### XGBoost
learning_rate = [0.05]
depth = [1,2,3]
n_estimators = [50,100,200,400,600]
gamma = [6,8,10,12]
child_weight = [6,8,10,12]
subsample = [0.5]
xgb_summary = base.XGB(KY_X, KY_Y, FL_X, FL_Y, 
                       learning_rate=learning_rate, 
                       depth=depth, 
                       estimators=n_estimators,
                       gamma=gamma, 
                       child_weight=child_weight, 
                       subsample=subsample, 
                       seed=816)

#### save results
summary_violent6_fl_model = {"Logistic": logistic_summary,
                             "Lasso": lasso_summary,
                             "LinearSVM": svm_summary,
                             "RF": rf_summary,
                             "XGBoost": xgb_summary}

In [5]:
results = []
for model_name, model_summary in summary_violent6_fl_model.items():
    results.append([model_name, 
                    np.mean(model_summary['FL_validation']), 
                    np.mean(model_summary['auc_diff']), 
                    np.mean(model_summary['KY_score'])])
results

[['Logistic', 0.6695413959307145, 0.04723925066910843, 0.6529234237983536],
 ['Lasso', 0.6501390620957653, 0.021981988136144204, 0.6621880538612469],
 ['LinearSVM', 0.6052509167077185, 0.07509230381270451, 0.533294297427899],
 ['RF', 0.6767600425411044, 0.05397182904437212, 0.7615852973439919],
 ['XGBoost', 0.687977490971216, 0.04320706179981004, 0.7734188450528892]]

In [6]:
path = "./results/baselines/six-month/"
results = [ ["Violent",             
            np.str(round(np.mean(logistic_summary['KY_score']),3)) + " (" + np.str(round(np.std(logistic_summary['KY_score']),3)) + ")", 
            np.str(round(np.mean(lasso_summary['KY_score']), 3)) + " (" + np.str(round(np.std(lasso_summary['KY_score']),3)) + ")", 
            np.str(round(np.mean(svm_summary['KY_score']),3)) + " (" + np.str(round(np.std(svm_summary['KY_score']),3)) + ")", 
            np.str(round(np.mean(rf_summary['KY_score']),3)) + " (" + np.str(round(np.std(rf_summary['KY_score']),3)) + ")", 
            np.str(round(np.mean(xgb_summary['KY_score']),3)) + " (" + np.str(round(np.std(xgb_summary['KY_score']),3)) + ")"]]
with open(path + 'six-month-fl-baseline-ky-score.csv', 'a') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerows(results)

In [7]:
path = "./results/baselines/six-month/"
results = [ ["Violent",             
            np.str(round(np.mean(logistic_summary['FL_score']),3)) + " (" + np.str(round(np.std(logistic_summary['FL_score']),3)) + ")", 
            np.str(round(np.mean(lasso_summary['FL_score']), 3)) + " (" + np.str(round(np.std(lasso_summary['FL_score']),3)) + ")", 
            np.str(round(np.mean(svm_summary['FL_score']),3)) + " (" + np.str(round(np.std(svm_summary['FL_score']),3)) + ")", 
            np.str(round(np.mean(rf_summary['FL_score']),3)) + " (" + np.str(round(np.std(rf_summary['FL_score']),3)) + ")", 
            np.str(round(np.mean(xgb_summary['FL_score']),3)) + " (" + np.str(round(np.std(xgb_summary['FL_score']),3)) + ")"]]
with open(path + 'six-month-fl-baseline-fl-score.csv', 'a') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerows(results)