In [1]:
#Basic Tools
import os, sys, glob
import pandas as pd
import numpy as np
import random
import pickle 

#import logistic regression tools
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Lasso
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier

#import other classifier tools
from sklearn.metrics import confusion_matrix, mean_squared_error, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve

import warnings
warnings.filterwarnings("ignore")

In [2]:
#Directory of radiomics results
classification_dir = "./Final_ResampledClassificationData.csv"
SelectedFeat_dir = "./results/LASSO_SelectedFeatures.csv"
outdir = "./results/model_selection"
if not os.path.exists(outdir):
    os.mkdir(outdir)

In [3]:
#Set Data
classify_df = pd.read_csv(classification_dir)
features = pd.read_csv(SelectedFeat_dir)

In [4]:
#initialize feature set and gt labels
ft = features["Feature"].values.tolist()
num_ft = len(ft)

ft = ["File", "NE_Score", "NE_Status", "NE_Class", "Group"] + ft
print(ft)

['File', 'NE_Score', 'NE_Status', 'NE_Class', 'Group', 'original_shape_MajorAxisLength', 'original_shape_Maximum2DDiameterRow', 'original_shape_MeshVolume', 'original_shape_MinorAxisLength', 'original_shape_SurfaceArea', 'original_shape_VoxelVolume', 'original_firstorder_Energy', 'original_firstorder_TotalEnergy', 'original_firstorder_Variance', 'original_gldm_DependenceNonUniformity', 'original_gldm_GrayLevelNonUniformity', 'original_gldm_LargeDependenceEmphasis', 'original_gldm_LargeDependenceHighGrayLevelEmphasis', 'original_glrlm_GrayLevelNonUniformity', 'original_glrlm_RunLengthNonUniformity', 'original_glszm_GrayLevelNonUniformity', 'original_glszm_LargeAreaEmphasis', 'original_glszm_LargeAreaHighGrayLevelEmphasis', 'original_glszm_LargeAreaLowGrayLevelEmphasis', 'original_glszm_ZoneVariance']


In [5]:
# select features of interest from main df
feat_data = classify_df[ft]
#print(feat_data) 
#print(feat_data.columns[-num_ft:].values)

In [6]:
#Get Group DataFrames
train_df = feat_data[feat_data["Group"] == "train"]
val_df = feat_data[feat_data["Group"] == "val"]
test_df = feat_data[feat_data["Group"] == "test"]
merge_train = feat_data[feat_data["Group"] != "test"]

In [7]:
#Get X and Y data
X_train = train_df[train_df.columns[-num_ft:].values]
Y_train = train_df[train_df.columns[3]]

X_val = val_df[val_df.columns[-num_ft:].values]
Y_val = val_df[val_df.columns[3]]

X_test = test_df[test_df.columns[-num_ft:].values]
Y_test = test_df[test_df.columns[3]]
Y_names = test_df["File"].tolist() 


In [8]:
# Weighted Training Hyperparameter search

In [9]:
## Check Data Distribution
Y_testNE = len (test_df[test_df["NE_Class"] == 1])
Y_testNonNE = len(test_df[test_df["NE_Class"] == 0])
ytest_frac = Y_testNE / Y_testNonNE

Y_mergeNE = len(merge_train[merge_train["NE_Class"] == 1])
Y_mergeNonNE = len(merge_train[merge_train["NE_Class"] == 0])
ymerge_frac = Y_mergeNE / Y_mergeNonNE

# calculate the data distribution ratios to help with class weighting
Y_allNE = len(classify_df[classify_df["NE_Class"] == 1])
Y_allNonNE = len(classify_df[classify_df["NE_Class"] == 0])
y_all_frac = Y_allNE / Y_allNonNE

# print(ytest_frac)
# print(ymerge_frac)
# print(y_all_frac)

#Do the inverse because of flipping operation (XGBoost)
inv_yall_frac = 1/y_all_frac
inv_ytrain_frac = 1/ymerge_frac

print("\nAll Data Distribution: NEneg, NEpos, ratio")
print(Y_allNonNE,Y_allNE,inv_yall_frac)

print("\nTraining Data Distribution: NEneg, NEpos, ratio")
print(Y_mergeNonNE,Y_mergeNE,inv_ytrain_frac)


All Data Distribution: NEneg, NEpos, ratio
55 172 0.3197674418604651

Training Data Distribution: NEneg, NEpos, ratio
43 134 0.3208955223880597


In [10]:
# evaluate multiple iterations of class weighting schemes varying from class distribution ratios
increment = np.arange(0.05, 10.05, 0.05)
temp_scale = {"class_weight":[], "neg_scale":[], "pos_scale":[]}

#to be consistent across all models, keep negative class weight=1 and vary positive class weight
for p, pos in enumerate(increment): 
    temp_scale["class_weight"].append([{0:1, 1:pos}])
    temp_scale["neg_scale"].append(1)
    temp_scale["pos_scale"].append(pos)

# print(temp_scale)

In [11]:
# save all scaling/weights evaluated
# print(temp_scale['class_weight'])
scale_df = pd.DataFrame(temp_scale)
scale_df.to_csv(os.path.join(outdir, "ScalingQuantities.csv"), index = False)

## Weighted Logistic Regression

In [12]:
# initialize model
val_dict = {"Params":[], "Accuracy":[], "PPV":[], "Sensitivity":[], "Specificity":[], "F1_Score":[],"AUC":[], "CM":[], "Pred_Value":[]}
test_dict = {"Params":[], "Accuracy":[], "PPV":[], "Sensitivity":[], "Specificity":[], "F1_Score":[],"AUC":[], "CM":[], "Pred_Value":[]}

#iterate params search
print("starting parameter search...")
for i, pair in enumerate(temp_scale['class_weight']):
    param_grid = {'class_weight': temp_scale['class_weight'][i]}
    #print(param_grid)

    for key, space in param_grid.items():
        # print("-"*50)
        # print("\nWeighing Scheme for Training: ")
        value = param_grid["class_weight"][0]
        # print(value)

        clf_log2 = LogisticRegression(random_state=2024, C = 0.5, solver = 'newton-cg',  max_iter = 1000, class_weight = value).fit(X_train, Y_train)

        #Validation Metrics
        ypred_val = clf_log2.predict(X_val)
        ypred_prob = clf_log2.predict_proba(X_val)[:,1]
        acc = accuracy_score(Y_val, ypred_val)
        sens = recall_score(Y_val, ypred_val)
        ppv = precision_score(Y_val, ypred_val)
        f1 = f1_score(Y_val, ypred_val)
        CM = confusion_matrix(Y_val, ypred_val)
        tn, fp, fn, tp = confusion_matrix(Y_val, ypred_val).ravel()
        spec = tn / (tn+fp)
        auc = roc_auc_score(Y_val, ypred_prob)

        # print("Saving Validation Results:")
        val_dict["Params"].append(value)
        val_dict["Accuracy"].append(acc)
        val_dict["PPV"].append(ppv)
        val_dict["Sensitivity"].append(sens)
        val_dict["Specificity"].append(spec)
        val_dict["F1_Score"].append(f1)
        val_dict["AUC"].append(auc)
        val_dict["CM"].append(CM)
        val_dict["Pred_Value"].append(ypred_val)
        
        #Test Metrics 
        ypred_test = clf_log2.predict(X_test)
        ypred_prob = clf_log2.predict_proba(X_test)[:,1]
        acc = accuracy_score(Y_test, ypred_test)
        sens = recall_score(Y_test, ypred_test)
        ppv = precision_score(Y_test, ypred_test)
        f1 = f1_score(Y_test, ypred_test)
        CM = confusion_matrix(Y_test, ypred_test)
        tn, fp, fn, tp = confusion_matrix(Y_test, ypred_test).ravel()
        spec = tn / (tn+fp)
        auc = roc_auc_score(Y_test, ypred_prob)

        # print("Saving Test Results:")
        test_dict["Params"].append(value)
        test_dict["Accuracy"].append(acc)
        test_dict["PPV"].append(ppv)
        test_dict["Sensitivity"].append(sens)
        test_dict["Specificity"].append(spec)
        test_dict["F1_Score"].append(f1)
        test_dict["AUC"].append(auc)
        test_dict["CM"].append(CM)
        test_dict["Pred_Value"].append(ypred_test)

print("done, saving") 

#save validation results
val_results = pd.DataFrame(val_dict)
val_results.to_csv(os.path.join(outdir, "LogisticRegression_ValResults.csv"), index = False)

#save test results
test_results = pd.DataFrame(test_dict)
test_results.to_csv(os.path.join(outdir, "LogisticRegression_TestResults.csv"), index = False)

starting parameter search...
done, saving


In [13]:
for z in ["Accuracy", "F1_Score","AUC"]:
    best_by_z = val_results.loc[val_results[z].idxmax()]
    print(f"Best Params by {z}:")
    print(best_by_z['Params'])


Best Params by Accuracy:
{0: 1, 1: np.float64(4.9)}
Best Params by F1_Score:
{0: 1, 1: np.float64(4.9)}
Best Params by AUC:
{0: 1, 1: np.float64(0.25)}


## Random Forest

In [14]:
# initialize model
val_dict = {"Params":[], "Accuracy":[], "PPV":[], "Sensitivity":[], "Specificity":[], "F1_Score":[],"AUC":[], "CM":[], "Pred_Value":[]}
test_dict = {"Params":[], "Accuracy":[], "PPV":[], "Sensitivity":[], "Specificity":[], "F1_Score":[],"AUC":[], "CM":[], "Pred_Value":[]}

#iterate params search
print("starting parameter search...")
for i, pair in enumerate(temp_scale['class_weight']):
    param_grid = {'class_weight': temp_scale['class_weight'][i]}
    #print(param_grid)

    for key, space in param_grid.items():
        # print("-"*50)
        # print("\nWeighing Scheme for Training: ")
        value = param_grid["class_weight"][0]
        # print(value)

        clf = RandomForestClassifier(n_estimators = 1000, random_state=200, criterion = "gini", min_samples_leaf = 2, class_weight = value).fit(X_train, Y_train)

        #Validation Metrics
        ypred_val = clf.predict(X_val)
        ypred_prob = clf.predict_proba(X_val)[:,1]
        acc = accuracy_score(Y_val, ypred_val)
        sens = recall_score(Y_val, ypred_val)
        ppv = precision_score(Y_val, ypred_val)
        f1 = f1_score(Y_val, ypred_val)
        CM = confusion_matrix(Y_val, ypred_val)
        tn, fp, fn, tp = confusion_matrix(Y_val, ypred_val).ravel()
        spec = tn / (tn+fp)
        auc = roc_auc_score(Y_val, ypred_prob)

        # print("Saving Validation Results:")
        val_dict["Params"].append(value)
        val_dict["Accuracy"].append(acc)
        val_dict["PPV"].append(ppv)
        val_dict["Sensitivity"].append(sens)
        val_dict["Specificity"].append(spec)
        val_dict["F1_Score"].append(f1)
        val_dict["AUC"].append(auc)
        val_dict["CM"].append(CM)
        val_dict["Pred_Value"].append(ypred_val)
        
        #Test Metrics 
        ypred_test = clf.predict(X_test)
        ypred_prob = clf.predict_proba(X_test)[:,1]
        acc = accuracy_score(Y_test, ypred_test)
        sens = recall_score(Y_test, ypred_test)
        ppv = precision_score(Y_test, ypred_test)
        f1 = f1_score(Y_test, ypred_test)
        CM = confusion_matrix(Y_test, ypred_test)
        tn, fp, fn, tp = confusion_matrix(Y_test, ypred_test).ravel()
        spec = tn / (tn+fp)
        auc = roc_auc_score(Y_test, ypred_prob)

        # print("Saving Test Results:")
        test_dict["Params"].append(value)
        test_dict["Accuracy"].append(acc)
        test_dict["PPV"].append(ppv)
        test_dict["Sensitivity"].append(sens)
        test_dict["Specificity"].append(spec)
        test_dict["F1_Score"].append(f1)
        test_dict["AUC"].append(auc)
        test_dict["CM"].append(CM)
        test_dict["Pred_Value"].append(ypred_test)

print("done, saving") 
#save validation results
val_results = pd.DataFrame(val_dict)
val_results.to_csv(os.path.join(outdir, "RandomForest_ValResults.csv"), index = False)

#save test results
test_results = pd.DataFrame(test_dict)
test_results.to_csv(os.path.join(outdir, "RandomForest_TestResults.csv"), index = False)

starting parameter search...
done, saving


Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 0.8488805970149255, 1: 1.123134328358209}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 0.8488805970149255, 1: 1.2033582089552237}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 0.8488805970149255, 1: 1.2835820895522387}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 0.8488805970149255, 1: 1.3638059701492538}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 0.8488805970149255, 1: 1.4440298507462686}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Sc

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.0186567164179106, 1: 1.9253731343283582}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.0186567164179106, 1: 2.0055970149253732}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.0186567164179106, 1: 2.085820895522388}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.0186567164179106, 1: 2.166044776119403}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.0186567164179106, 1: 2.246268656716418}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Sche

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.1884328358208958, 1: 2.7276119402985075}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.1884328358208958, 1: 2.807835820895522}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.1884328358208958, 1: 2.888059701492537}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.1884328358208958, 1: 2.968283582089552}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.1884328358208958, 1: 3.048507462686567}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Schem

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.5279850746268657, 1: 0.5615671641791045}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.5279850746268657, 1: 0.6417910447761194}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.5279850746268657, 1: 0.7220149253731343}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.5279850746268657, 1: 0.8022388059701492}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.5279850746268657, 1: 0.8824626865671641}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing S

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.697761194029851, 1: 1.3638059701492538}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.697761194029851, 1: 1.4440298507462686}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.697761194029851, 1: 1.5242537313432836}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.697761194029851, 1: 1.6044776119402984}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.697761194029851, 1: 1.6847014925373134}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.8675373134328361, 1: 2.166044776119403}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.8675373134328361, 1: 2.246268656716418}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.8675373134328361, 1: 2.326492537313433}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.8675373134328361, 1: 2.4067164179104474}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 1.8675373134328361, 1: 2.4869402985074625}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Sche

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.037313432835821, 1: 2.968283582089552}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.037313432835821, 1: 3.048507462686567}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.037313432835821, 1: 3.1287313432835817}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.037313432835821, 1: 3.2089552238805967}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.207089552238806, 1: 0.3208955223880597}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme f

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.3768656716417915, 1: 0.8022388059701492}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.3768656716417915, 1: 0.8824626865671641}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.3768656716417915, 1: 0.9626865671641791}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.3768656716417915, 1: 1.042910447761194}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.3768656716417915, 1: 1.123134328358209}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Sch

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.5466417910447765, 1: 1.6044776119402984}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.5466417910447765, 1: 1.6847014925373134}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.5466417910447765, 1: 1.7649253731343282}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.5466417910447765, 1: 1.8451492537313432}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.5466417910447765, 1: 1.9253731343283582}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing S

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.7164179104477615, 1: 2.4067164179104474}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.7164179104477615, 1: 2.4869402985074625}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.7164179104477615, 1: 2.5671641791044775}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.7164179104477615, 1: 2.6473880597014925}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.7164179104477615, 1: 2.7276119402985075}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing S

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 2.8861940298507465, 1: 3.2089552238805967}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.0559701492537314, 1: 0.3208955223880597}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.0559701492537314, 1: 0.4011194029850746}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.0559701492537314, 1: 0.48134328358208955}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.0559701492537314, 1: 0.5615671641791045}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing 

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.225746268656717, 1: 1.042910447761194}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.225746268656717, 1: 1.123134328358209}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.225746268656717, 1: 1.2033582089552237}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.225746268656717, 1: 1.2835820895522387}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.225746268656717, 1: 1.3638059701492538}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme f

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.395522388059702, 1: 1.8451492537313432}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.395522388059702, 1: 1.9253731343283582}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.395522388059702, 1: 2.0055970149253732}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.395522388059702, 1: 2.085820895522388}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.395522388059702, 1: 2.166044776119403}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme f

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.565298507462687, 1: 2.6473880597014925}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.565298507462687, 1: 2.7276119402985075}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.565298507462687, 1: 2.807835820895522}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.565298507462687, 1: 2.888059701492537}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.565298507462687, 1: 2.968283582089552}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme fo

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.9048507462686572, 1: 0.48134328358208955}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.9048507462686572, 1: 0.5615671641791045}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.9048507462686572, 1: 0.6417910447761194}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.9048507462686572, 1: 0.7220149253731343}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 3.9048507462686572, 1: 0.8022388059701492}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing 

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.074626865671642, 1: 1.2835820895522387}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.074626865671642, 1: 1.3638059701492538}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.074626865671642, 1: 1.4440298507462686}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.074626865671642, 1: 1.5242537313432836}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.074626865671642, 1: 1.6044776119402984}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.244402985074627, 1: 2.085820895522388}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.244402985074627, 1: 2.166044776119403}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.244402985074627, 1: 2.246268656716418}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.244402985074627, 1: 2.326492537313433}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.244402985074627, 1: 2.4067164179104474}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.414179104477612, 1: 2.888059701492537}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.414179104477612, 1: 2.968283582089552}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.414179104477612, 1: 3.048507462686567}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.414179104477612, 1: 3.1287313432835817}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.414179104477612, 1: 3.2089552238805967}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme fo

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.753731343283583, 1: 0.7220149253731343}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.753731343283583, 1: 0.8022388059701492}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.753731343283583, 1: 0.8824626865671641}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.753731343283583, 1: 0.9626865671641791}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.753731343283583, 1: 1.042910447761194}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme 

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.923507462686568, 1: 1.5242537313432836}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.923507462686568, 1: 1.6044776119402984}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.923507462686568, 1: 1.6847014925373134}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.923507462686568, 1: 1.7649253731343282}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 4.923507462686568, 1: 1.8451492537313432}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.093283582089553, 1: 2.326492537313433}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.093283582089553, 1: 2.4067164179104474}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.093283582089553, 1: 2.4869402985074625}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.093283582089553, 1: 2.5671641791044775}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.093283582089553, 1: 2.6473880597014925}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme 

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.263059701492538, 1: 3.1287313432835817}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.263059701492538, 1: 3.2089552238805967}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.432835820895523, 1: 0.3208955223880597}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.432835820895523, 1: 0.4011194029850746}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.432835820895523, 1: 0.48134328358208955}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Schem

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.602611940298508, 1: 0.9626865671641791}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.602611940298508, 1: 1.042910447761194}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.602611940298508, 1: 1.123134328358209}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.602611940298508, 1: 1.2033582089552237}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.602611940298508, 1: 1.2835820895522387}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme f

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.772388059701493, 1: 1.7649253731343282}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.772388059701493, 1: 1.8451492537313432}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.772388059701493, 1: 1.9253731343283582}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.772388059701493, 1: 2.0055970149253732}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.772388059701493, 1: 2.085820895522388}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme 

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.942164179104478, 1: 2.5671641791044775}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.942164179104478, 1: 2.6473880597014925}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.942164179104478, 1: 2.7276119402985075}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.942164179104478, 1: 2.807835820895522}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 5.942164179104478, 1: 2.888059701492537}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme f

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.281716417910449, 1: 0.4011194029850746}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.281716417910449, 1: 0.48134328358208955}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.281716417910449, 1: 0.5615671641791045}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.281716417910449, 1: 0.6417910447761194}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.281716417910449, 1: 0.7220149253731343}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Schem

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.451492537313434, 1: 1.2033582089552237}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.451492537313434, 1: 1.2835820895522387}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.451492537313434, 1: 1.3638059701492538}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.451492537313434, 1: 1.4440298507462686}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.451492537313434, 1: 1.5242537313432836}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.621268656716419, 1: 2.0055970149253732}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.621268656716419, 1: 2.085820895522388}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.621268656716419, 1: 2.166044776119403}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.621268656716419, 1: 2.246268656716418}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.621268656716419, 1: 2.326492537313433}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for

Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.791044776119404, 1: 2.807835820895522}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.791044776119404, 1: 2.888059701492537}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.791044776119404, 1: 2.968283582089552}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.791044776119404, 1: 3.048507462686567}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for Training: 
{0: 6.791044776119404, 1: 3.1287313432835817}
Saving Validation Results:
Saving Test Results:
--------------------------------------------------

Weighing Scheme for

In [15]:
for z in ["Accuracy", "F1_Score","AUC"]:
    best_by_z = val_results.loc[val_results[z].idxmax()]
    print(f"Best Params by {z}:")
    print(best_by_z['Params'])


Best Params by Accuracy:
{0: 1, 1: np.float64(8.600000000000001)}
Best Params by F1_Score:
{0: 1, 1: np.float64(8.600000000000001)}
Best Params by AUC:
{0: 1, 1: np.float64(0.3)}


## Xgboost

In [16]:
# initialize model
val_dict = {"Params":[], "Accuracy":[], "PPV":[], "Sensitivity":[], "Specificity":[], "F1_Score":[],"AUC":[], "CM":[], "Pred_Value":[]}
test_dict = {"Params":[], "Accuracy":[], "PPV":[], "Sensitivity":[], "Specificity":[], "F1_Score":[],"AUC":[], "CM":[], "Pred_Value":[]}

#iterate params search
print("starting parameter search...")
for i, pair in enumerate(temp_scale['class_weight']):
    param_grid = {'class_weight': temp_scale['class_weight'][i]}
    #print(param_grid)

    for key, space in param_grid.items():
        # print("-"*50)
        # print("\nWeighing Scheme for Training: ")
        value = param_grid["class_weight"][0]
        # print(value)

        clf = xgb.XGBClassifier(tree_method="hist", seed=100, random_state = 100, booster = 'gblinear', scale_pos_weight = value[1]).fit(X_train, Y_train)

        #Validation Metrics
        ypred_val = clf.predict(X_val)
        ypred_prob = clf.predict_proba(X_val)[:,1]
        acc = accuracy_score(Y_val, ypred_val)
        sens = recall_score(Y_val, ypred_val)
        ppv = precision_score(Y_val, ypred_val)
        f1 = f1_score(Y_val, ypred_val)
        CM = confusion_matrix(Y_val, ypred_val)
        tn, fp, fn, tp = confusion_matrix(Y_val, ypred_val).ravel()
        spec = tn / (tn+fp)
        auc = roc_auc_score(Y_val, ypred_prob)

        # print("Saving Validation Results:")
        val_dict["Params"].append(value)
        val_dict["Accuracy"].append(acc)
        val_dict["PPV"].append(ppv)
        val_dict["Sensitivity"].append(sens)
        val_dict["Specificity"].append(spec)
        val_dict["F1_Score"].append(f1)
        val_dict["AUC"].append(auc)
        val_dict["CM"].append(CM)
        val_dict["Pred_Value"].append(ypred_val)
        
        #Test Metrics 
        ypred_test = clf.predict(X_test)
        ypred_prob = clf.predict_proba(X_test)[:,1]
        acc = accuracy_score(Y_test, ypred_test)
        sens = recall_score(Y_test, ypred_test)
        ppv = precision_score(Y_test, ypred_test)
        f1 = f1_score(Y_test, ypred_test)
        CM = confusion_matrix(Y_test, ypred_test)
        tn, fp, fn, tp = confusion_matrix(Y_test, ypred_test).ravel()
        spec = tn / (tn+fp)
        auc = roc_auc_score(Y_test, ypred_prob)

        # print("Saving Test Results:")
        test_dict["Params"].append(value)
        test_dict["Accuracy"].append(acc)
        test_dict["PPV"].append(ppv)
        test_dict["Sensitivity"].append(sens)
        test_dict["Specificity"].append(spec)
        test_dict["F1_Score"].append(f1)
        test_dict["AUC"].append(auc)
        test_dict["CM"].append(CM)
        test_dict["Pred_Value"].append(ypred_test)

print("done, saving") 
#save validation results
val_results = pd.DataFrame(val_dict)
val_results.to_csv(os.path.join(outdir, "XGBoost_ValResults.csv"), index = False)

#save test results
test_results = pd.DataFrame(test_dict)
test_results.to_csv(os.path.join(outdir, "XGBoost_TestResults.csv"), index = False)   

starting parameter search...
done, saving


In [17]:
for z in ["Accuracy", "F1_Score","AUC"]:
    best_by_z = val_results.loc[val_results[z].idxmax()]
    print(f"Best Params by {z}:")
    print(best_by_z['Params'])


Best Params by Accuracy:
{0: 1, 1: np.float64(0.8)}
Best Params by F1_Score:
{0: 1, 1: np.float64(0.8)}
Best Params by AUC:
{0: 1, 1: np.float64(0.05)}
