In [1]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.cm as cm
import numpy as np
import random

In [71]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve
from sklearn.model_selection import cross_validate
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import StackingClassifier

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
with open('big_cards', 'rb') as f: # 100% Of features
    big_cards = pickle.load(f)
    
with open('big_top_50', 'rb') as f: # Top 50% of features
    big_top_50 = pickle.load(f)

In [35]:
def quick_test(model, X, y):
# The following splits result in: 60% Train, 20% Val, 20% Test
    xtrain_val, xtest, ytrain_val, ytest = train_test_split(X, y, test_size=0.2)
    xtrain, xval, ytrain, yval = train_test_split(xtrain_val, ytrain_val, test_size=0.25)   
# val
    model_60 = model.fit(xtrain, ytrain)
    val_score = np.round(model_60.score(xval, yval), 3)
# train
    train_score = np.round(model_60.score(xtrain, ytrain), 3)
# overfit        
    overfit_score = np.round(abs(val_score-train_score), 4)
# test   
    model_80 = model.fit(xtrain_val, ytrain_val)
    test_score = np.round(model_80.score(xtest, ytest), 3)
# F1
    y_true = ytest
    y_pred = model_80.predict(xtest)
# ROC AUC
    y_score = model_80.predict_proba(xtest)
    
    return [val_score, train_score, overfit_score, test_score, y_true, y_pred, y_score, model_80]

In [6]:
def quick_predict(model, X, y):
    xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3)
    model.fit(xtrain, ytrain)
    return model.predict(xtest), ytest

def quick_probas(model, X, y):
    xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3)
    model.fit(xtrain, ytrain)
    return model.predict_proba(xtest), ytest

In [7]:
X_100 = big_cards.drop('Binary_Rank', axis = 1)
y_100 = big_cards.loc[:, 'Binary_Rank']

In [8]:
X_50 = big_top_50.drop('Binary_Rank', axis = 1)
y_50 = big_top_50.loc[:, 'Binary_Rank']

In [9]:
#xtrain_100, xtest_100, ytrain_100, ytest_100 = train_test_split(X_100, y_100, test_size=0.3)

In [10]:
#xtrain_50, xtest_50, ytrain_50, ytest_50 = train_test_split(X_50, y_50, test_size=0.3)

In [11]:
logreg_C_tenth = LogisticRegression(C = 0.1)
logreg_C_one = LogisticRegression(C = 1)
logreg_C_ten = LogisticRegression(C = 10)

In [12]:
# F1 Score
# 100% Features, C = 0.1:
test_results = quick_test(logreg_C_tenth, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.795
ROC AUC Score:... 0.85


In [13]:
# F1 Score
# 100% Features, C = 1:
test_results = quick_test(logreg_C_one, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.806
ROC AUC Score:... 0.859


In [14]:
# F1 Score
# 100% Features, C = 10:
test_results = quick_test(logreg_C_ten, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.789
ROC AUC Score:... 0.846


In [15]:
# F1 Score
# 50% Features, C = 0.1:
test_results = quick_test(logreg_C_tenth, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.809
ROC AUC Score:... 0.869


In [16]:
# F1 Score
# 50% Features, C = 1:
test_results = quick_test(logreg_C_one, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.811
ROC AUC Score:... 0.874


In [18]:
# F1 Score
# 50% Features, C = 10:
test_results = quick_test(logreg_C_ten, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.814
ROC AUC Score:... 0.874


In [20]:
# Decision Trees:
dt_depth_None = DecisionTreeClassifier(max_depth=None)
dt_depth_16 = DecisionTreeClassifier(max_depth=16)
dt_depth_4 = DecisionTreeClassifier(max_depth=4)

In [21]:
# F1 & ROC Score
# 100% Features, Max Depth = None:
test_results = quick_test(dt_depth_None, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.843
ROC AUC Score:... 0.825


In [22]:
# F1 & ROC Score
# 100% Features, Max Depth = 16:
test_results = quick_test(dt_depth_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.847
ROC AUC Score:... 0.882


In [23]:
# F1 & ROC Score
# 100% Features, Max Depth = 4:
test_results = quick_test(dt_depth_4, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.772
ROC AUC Score:... 0.799


In [24]:
# F1 & ROC Score
# 50% Features, Max Depth = None:
test_results = quick_test(dt_depth_None, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.823
ROC AUC Score:... 0.858


In [25]:
# F1 & ROC Score
# 50% Features, Max Depth = 16:
test_results = quick_test(dt_depth_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.82
ROC AUC Score:... 0.871


In [26]:
# F1 & ROC Score
# 50% Features, Max Depth = 4:
test_results = quick_test(dt_depth_4, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.789
ROC AUC Score:... 0.777


In [27]:
rf_n_10_depth_16 = RandomForestClassifier(n_estimators=10, max_depth=16)
rf_n_100_depth_16 = RandomForestClassifier(n_estimators=100, max_depth=16)
rf_n_1000_depth_16 = RandomForestClassifier(n_estimators=1000, max_depth=16)

In [28]:
# F1 & ROC Score
# 100% Features, n = 10 Max Depth = 16:
test_results = quick_test(rf_n_10_depth_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.831
ROC AUC Score:... 0.886


In [29]:
# F1 & ROC Score
# 100% Features, n = 100 Max Depth = 16:
test_results = quick_test(rf_n_100_depth_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.837
ROC AUC Score:... 0.891


In [30]:
# F1 & ROC Score
# 100% Features, n = 1000 Max Depth = 16:
test_results = quick_test(rf_n_1000_depth_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.836
ROC AUC Score:... 0.894


In [31]:
# F1 & ROC Score
# 50% Features, n = 10 Max Depth = 16:
test_results = quick_test(rf_n_10_depth_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.814
ROC AUC Score:... 0.866


In [32]:
# F1 & ROC Score
# 50% Features, n = 100 Max Depth = 16:
test_results = quick_test(rf_n_100_depth_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.819
ROC AUC Score:... 0.875


In [33]:
# F1 & ROC Score
# 50% Features, n = 1000 Max Depth = 16:
test_results = quick_test(rf_n_1000_depth_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('F1 Score:........', F1_C_tenth)
print('ROC AUC Score:...', ROC_AUC_C_tenth)

F1 Score:........ 0.823
ROC AUC Score:... 0.876


In [52]:
# Visualizing Trees
# results = quick_test(dt_depth_16, X_50, y_50)
# tree.export_graphviz(results[7], out_file='dtree_16_50.dot', feature_names = X_50.columns,
#                 class_names = 'Binary_Rank',
#                 rounded = True, proportion = False, 
#                 precision = 2, filled = True)
# tree.plot_tree(results[7])

In [50]:
# call(['dot', '-Tpng', 'dtree_16_50.dot', '-o', 'dtree_16_50.png', '-Gdpi=600'])

In [55]:
adaboost_16 = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=100)
test_results = quick_test(adaboost_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

F1 Score:........ 0.864
ROC AUC Score:... 0.915


In [59]:
adaboost_16 = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=50)
test_results = quick_test(adaboost_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.837
Train Score:........ 0.993
Overfit Score:...... 0.156
Test Score:......... 0.848
F1 Score:........... 0.867
ROC AUC Score:...... 0.921


In [58]:
adaboost_16 = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=10)
test_results = quick_test(adaboost_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.839
Train Score:........ 0.92
Overfit Score:...... 0.081
Test Score:......... 0.849
F1 Score:........... 0.867
ROC AUC Score:...... 0.905


In [66]:
adaboost_16 = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=100)
test_results = quick_test(adaboost_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.81
Train Score:........ 0.898
Overfit Score:...... 0.088
Test Score:......... 0.816
F1 Score:........... 0.839
ROC AUC Score:...... 0.884


In [65]:
adaboost_16 = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=50)
test_results = quick_test(adaboost_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.807
Train Score:........ 0.899
Overfit Score:...... 0.092
Test Score:......... 0.821
F1 Score:........... 0.843
ROC AUC Score:...... 0.894


In [64]:
adaboost_16 = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=10)
test_results = quick_test(adaboost_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.81
Train Score:........ 0.861
Overfit Score:...... 0.051
Test Score:......... 0.815
F1 Score:........... 0.839
ROC AUC Score:...... 0.895


In [63]:
gradboost_16 = GradientBoostingClassifier(n_estimators=100)
test_results = quick_test(gradboost_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.805
Train Score:........ 0.81
Overfit Score:...... 0.005
Test Score:......... 0.813
F1 Score:........... 0.837
ROC AUC Score:...... 0.894


In [62]:
gradboost_16 = GradientBoostingClassifier(n_estimators=50)
test_results = quick_test(gradboost_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.793
Train Score:........ 0.795
Overfit Score:...... 0.002
Test Score:......... 0.8
F1 Score:........... 0.827
ROC AUC Score:...... 0.882


In [61]:
gradboost_16 = GradientBoostingClassifier(n_estimators=10)
test_results = quick_test(gradboost_16, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.74
Train Score:........ 0.738
Overfit Score:...... 0.002
Test Score:......... 0.737
F1 Score:........... 0.791
ROC AUC Score:...... 0.835


In [67]:
gradboost_16 = GradientBoostingClassifier(n_estimators=100)
test_results = quick_test(gradboost_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.781
Train Score:........ 0.782
Overfit Score:...... 0.001
Test Score:......... 0.786
F1 Score:........... 0.818
ROC AUC Score:...... 0.87


In [68]:
gradboost_16 = GradientBoostingClassifier(n_estimators=50)
test_results = quick_test(gradboost_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.76
Train Score:........ 0.76
Overfit Score:...... 0.0
Test Score:......... 0.771
F1 Score:........... 0.804
ROC AUC Score:...... 0.856


In [69]:
gradboost_16 = GradientBoostingClassifier(n_estimators=10)
test_results = quick_test(gradboost_16, X_50, y_50)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.721
Train Score:........ 0.725
Overfit Score:...... 0.004
Test Score:......... 0.722
F1 Score:........... 0.779
ROC AUC Score:...... 0.808


In [72]:
# Stacking:
adaboost_16_50 = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=50)
adaboost_16_10 = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=10)
gradboost_16_100 = GradientBoostingClassifier(n_estimators=100)
model_list = [('ada_50', adaboost_16_50), ('ada_10', adaboost_16_10), ('grad_100', gradboost_16_100)]
best_metrics_stack = StackingClassifier(estimators = model_list, final_estimator = LogisticRegression())

In [73]:
test_results = quick_test(best_metrics_stack, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.854
Train Score:........ 0.973
Overfit Score:...... 0.119
Test Score:......... 0.863
F1 Score:........... 0.88
ROC AUC Score:...... 0.939


In [74]:
logReg = LogisticRegression(C = 1)
randForst = RandomForestClassifier(n_estimators=1000, max_depth=16)
adaBoost = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=10)
gradBoost = GradientBoostingClassifier(n_estimators=100)

diverse_model_list = [('logReg', logReg), ('randForst', randForst), ('adaBoost', adaBoost), ('gradBoost', gradBoost)]
best_metrics_stack = StackingClassifier(estimators = diverse_model_list, final_estimator = LogisticRegression())

In [75]:
test_results = quick_test(best_metrics_stack, X_100, y_100)
y_true = test_results[4]
y_pred = test_results[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results[0])
print('Train Score:........', test_results[1])
print('Overfit Score:......', test_results[2])
print('Test Score:.........', test_results[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.835
Train Score:........ 0.892
Overfit Score:...... 0.057
Test Score:......... 0.842
F1 Score:........... 0.861
ROC AUC Score:...... 0.922


In [76]:
 # Best 100% Features Stack
logReg = LogisticRegression(C = 1)
dTree = DecisionTreeClassifier(max_depth=16)
randForst = RandomForestClassifier(n_estimators=100, max_depth=16)
adaBoost = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=10)
gradBoost = GradientBoostingClassifier(n_estimators=100)

diverse_model_list = [('logReg', logReg), ('dTree', dTree), ('randForst', randForst), ('adaBoost', adaBoost), ('gradBoost', gradBoost)]
best_100_stack = StackingClassifier(estimators = diverse_model_list, final_estimator = LogisticRegression())

In [78]:
test_results_best_100_stack = quick_test(best_100_stack, X_100, y_100)
y_true = test_results_best_100_stack[4]
y_pred = test_results_best_100_stack[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results_best_100_stack[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results_best_100_stack[0])
print('Train Score:........', test_results_best_100_stack[1])
print('Overfit Score:......', test_results_best_100_stack[2])
print('Test Score:.........', test_results_best_100_stack[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.84
Train Score:........ 0.897
Overfit Score:...... 0.057
Test Score:......... 0.839
F1 Score:........... 0.859
ROC AUC Score:...... 0.92


In [79]:
# Best 50% Features Stack
logReg = LogisticRegression(C = 10)
dTree = DecisionTreeClassifier(max_depth=16)
randForst = RandomForestClassifier(n_estimators=100, max_depth=16)
adaBoost = AdaBoostClassifier(base_estimator = dt_depth_16, n_estimators=10)
gradBoost = GradientBoostingClassifier(n_estimators=100)

diverse_model_list = [('logReg', logReg), ('dTree', dTree), ('randForst', randForst), ('adaBoost', adaBoost), 
                      ('gradBoost', gradBoost)]
best_50_stack = StackingClassifier(estimators = diverse_model_list, final_estimator = LogisticRegression())

In [80]:
test_results_best_50_stack = quick_test(best_50_stack, X_50, y_50)
y_true = test_results_best_50_stack[4]
y_pred = test_results_best_50_stack[5]
F1_C_tenth = np.round(f1_score(y_true, y_pred), 3)
y_score = test_results_best_50_stack[6][:,1]
ROC_AUC_C_tenth = np.round(roc_auc_score(y_true, y_score), 3)
print('Validation Score:...', test_results_best_50_stack[0])
print('Train Score:........', test_results_best_50_stack[1])
print('Overfit Score:......', test_results_best_50_stack[2])
print('Test Score:.........', test_results_best_50_stack[3])
print('F1 Score:...........', F1_C_tenth)
print('ROC AUC Score:......', ROC_AUC_C_tenth)

Validation Score:... 0.814
Train Score:........ 0.848
Overfit Score:...... 0.034
Test Score:......... 0.819
F1 Score:........... 0.844
ROC AUC Score:...... 0.905
