In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from scipy import stats
%config InlineBackend.figure_format = 'retina'

In [2]:
## Read model training results
cv_results = pickle.load(open('cv_results_3.p', 'rb'))
results_final = np.load('results_final_3.npz')['arr_0']

### Use results_final to construct tables ###

In [3]:
results_final.shape

(2, 7, 3, 20)

In [4]:
## Reshape results_final to make "trials" a separate dimension
## Resulting dimensions:
##  1D -> 2 {train = 0, test = 1}
##  2D -> 7 models
##  3D -> 3 metrics
##  4D -> 5 trials
##  5D -> 4 datasets

results_final_dataset_1 = results_final[:, :, :, 0:5].reshape(2, 7, 3, 5, 1)
results_final_dataset_2 = results_final[:, :, :, 5:10].reshape(2, 7, 3, 5, 1)
results_final_dataset_3 = results_final[:, :, :, 10:15].reshape(2, 7, 3, 5, 1)
results_final_dataset_4 = results_final[:, :, :, 15:20].reshape(2, 7, 3, 5, 1)
results_final = np.concatenate((results_final_dataset_1, results_final_dataset_2, results_final_dataset_3, results_final_dataset_4), axis=4)
results_final.shape

(2, 7, 3, 5, 4)

In [None]:
## Average results_final across trials to get a 2x7x3x4 numpy array
results_final_trial_avg = np.mean(results_final, axis = 3)
results_final_trial_avg.shape

In [None]:
## Table 2: mean test set performance across trials for each algorithm/dataset combo
##   shape = 7x(3+1) last column is the means
##   rows -> models
##   cols -> metrics
table_2 = np.mean(results_final_trial_avg[1, :, :, :], axis=2)
table_2_mean = np.mean(table_2, axis=1).reshape(7, 1)  ## Mean across metrics
table_2 = np.concatenate((table_2, table_2_mean), axis=1)

print(np.round(table_2, decimals=3))

In [None]:
## Table 3: mean test set performance across trials for each algorithm/metric combo
##   shape = 7x(4+1) last column is the means
##   rows -> models
##   cols -> datasets
table_3 = np.mean(results_final_trial_avg[1, :, :, :], axis=1)
table_3_mean = np.mean(table_3, axis=1).reshape(7, 1)  ## Mean across datasets
table_3 = np.concatenate((table_3, table_3_mean), axis=1)

print(np.round(table_3, decimals=3))

In [None]:
## Table 4 (Appendix): mean training set performance across trials for each algorithm/dataset combo
##   shape = 7x(3+1) last column is the means
##   rows -> models
##   cols -> metrics
table_4 = np.mean(results_final_trial_avg[0, :, :, :], axis=2)
table_4_mean = np.mean(table_4, axis=1).reshape(7, 1)  ## Mean across metrics
table_4 = np.concatenate((table_4, table_4_mean), axis=1)

print(np.round(table_4, decimals=3))

In [None]:
## Table 5 (Appendix): raw testing set performance
##   shape = 7x3x4 
##   rows (1D) -> models
##   cols (2D) -> metrics
##        (3D) -> datasets
table_5 = results_final_trial_avg[1, :, :, :]

print(np.round(table_5[:, :, 0], decimals=3))  ## Adult dataset
print(np.round(table_5[:, :, 1], decimals=3))  ## Occupancy dataset
print(np.round(table_5[:, :, 2], decimals=3))  ## HTRU2 dataset
print(np.round(table_5[:, :, 3], decimals=3))  ## Activity dataset

In [7]:
## Table 5 (Appendix): raw testing set performance
##   Column 1: Adult dataset
print(np.round(results_final[1, :, :, 0, 0], decimals=3))  ## Adult dataset trial 1
print(np.round(results_final[1, :, :, 1, 0], decimals=3))  ## Adult dataset trial 2
print(np.round(results_final[1, :, :, 2, 0], decimals=3))  ## Adult dataset trial 3
print(np.round(results_final[1, :, :, 3, 0], decimals=3))  ## Adult dataset trial 4
print(np.round(results_final[1, :, :, 4, 0], decimals=3))  ## Adult dataset trial 5

[[0.829 0.598 0.726]
 [0.649 0.602 0.745]
 [0.828 0.595 0.724]
 [0.828 0.601 0.729]
 [0.797 0.564 0.708]
 [0.812 0.568 0.711]
 [0.828 0.601 0.73 ]]
[[0.828 0.602 0.73 ]
 [0.787 0.531 0.693]
 [0.829 0.612 0.74 ]
 [0.828 0.61  0.736]
 [0.803 0.578 0.724]
 [0.809 0.577 0.718]
 [0.828 0.594 0.723]]
[[0.828 0.602 0.729]
 [0.771 0.412 0.62 ]
 [0.826 0.571 0.711]
 [0.826 0.599 0.727]
 [0.799 0.564 0.7  ]
 [0.813 0.58  0.715]
 [0.829 0.574 0.722]]
[[0.829 0.605 0.731]
 [0.766 0.585 0.735]
 [0.822 0.587 0.721]
 [0.826 0.6   0.729]
 [0.799 0.547 0.716]
 [0.814 0.573 0.712]
 [0.83  0.597 0.729]]
[[0.826 0.591 0.722]
 [0.792 0.414 0.627]
 [0.826 0.564 0.705]
 [0.827 0.566 0.707]
 [0.796 0.579 0.724]
 [0.812 0.572 0.714]
 [0.826 0.579 0.717]]


In [8]:
##   Column 2: Occupancy dataset
print(np.round(results_final[1, :, :, 0, 1], decimals=3))  ## Occupancy dataset trial 1
print(np.round(results_final[1, :, :, 1, 1], decimals=3))  ## Occupancy dataset trial 2
print(np.round(results_final[1, :, :, 2, 1], decimals=3))  ## Occupancy dataset trial 3
print(np.round(results_final[1, :, :, 3, 1], decimals=3))  ## Occupancy dataset trial 4
print(np.round(results_final[1, :, :, 4, 1], decimals=3))  ## Occupancy dataset trial 5

[[0.988 0.976 0.991]
 [0.985 0.968 0.989]
 [0.988 0.975 0.991]
 [0.99  0.978 0.993]
 [0.989 0.976 0.991]
 [0.991 0.981 0.991]
 [0.99  0.979 0.992]]
[[0.989 0.977 0.992]
 [0.988 0.975 0.988]
 [0.989 0.976 0.991]
 [0.99  0.979 0.992]
 [0.989 0.977 0.991]
 [0.99  0.98  0.99 ]
 [0.989 0.977 0.987]]
[[0.988 0.974 0.99 ]
 [0.913 0.777 0.822]
 [0.988 0.975 0.99 ]
 [0.988 0.974 0.992]
 [0.988 0.975 0.99 ]
 [0.991 0.981 0.99 ]
 [0.99  0.979 0.99 ]]
[[0.989 0.976 0.991]
 [0.902 0.825 0.936]
 [0.988 0.975 0.991]
 [0.988 0.976 0.991]
 [0.989 0.975 0.991]
 [0.992 0.981 0.991]
 [0.991 0.981 0.991]]
[[0.99  0.978 0.992]
 [0.978 0.894 0.985]
 [0.989 0.976 0.991]
 [0.991 0.98  0.993]
 [0.99  0.978 0.991]
 [0.992 0.982 0.991]
 [0.991 0.981 0.991]]


In [9]:
##   Column 3: HTRU2 dataset
print(np.round(results_final[1, :, :, 0, 2], decimals=3))  ## HTRU2 dataset trial 1
print(np.round(results_final[1, :, :, 1, 2], decimals=3))  ## HTRU2 dataset trial 2
print(np.round(results_final[1, :, :, 2, 2], decimals=3))  ## HTRU2 dataset trial 3
print(np.round(results_final[1, :, :, 3, 2], decimals=3))  ## HTRU2 dataset trial 4
print(np.round(results_final[1, :, :, 4, 2], decimals=3))  ## HTRU2 dataset trial 5

[[0.98  0.882 0.912]
 [0.976 0.855 0.891]
 [0.979 0.876 0.907]
 [0.98  0.886 0.915]
 [0.978 0.874 0.91 ]
 [0.98  0.886 0.922]
 [0.98  0.885 0.923]]
[[0.98  0.885 0.916]
 [0.977 0.868 0.762]
 [0.98  0.888 0.918]
 [0.979 0.877 0.91 ]
 [0.977 0.87  0.906]
 [0.98  0.882 0.918]
 [0.98  0.886 0.923]]
[[0.98  0.882 0.914]
 [0.975 0.852 0.856]
 [0.98  0.883 0.915]
 [0.979 0.875 0.906]
 [0.977 0.869 0.911]
 [0.979 0.88  0.91 ]
 [0.98  0.883 0.917]]
[[0.98  0.883 0.912]
 [0.947 0.598 0.714]
 [0.98  0.88  0.908]
 [0.98  0.891 0.917]
 [0.978 0.873 0.916]
 [0.98  0.886 0.917]
 [0.979 0.879 0.912]]
[[0.98  0.883 0.914]
 [0.967 0.783 0.826]
 [0.98  0.882 0.912]
 [0.98  0.882 0.916]
 [0.978 0.873 0.916]
 [0.979 0.883 0.92 ]
 [0.98  0.886 0.926]]


In [10]:
##   Column 4: Activity dataset
print(np.round(results_final[1, :, :, 0, 3], decimals=3))  ## Activity dataset trial 1
print(np.round(results_final[1, :, :, 1, 3], decimals=3))  ## Activity dataset trial 2
print(np.round(results_final[1, :, :, 2, 3], decimals=3))  ## Activity dataset trial 3
print(np.round(results_final[1, :, :, 3, 3], decimals=3))  ## Activity dataset trial 4
print(np.round(results_final[1, :, :, 4, 3], decimals=3))  ## Activity dataset trial 5

[[0.976 0.554 0.698]
 [0.885 0.025 0.506]
 [0.979 0.618 0.726]
 [0.98  0.667 0.784]
 [0.979 0.666 0.826]
 [0.983 0.731 0.801]
 [0.983 0.744 0.824]]
[[0.977 0.571 0.706]
 [0.963 0.058 0.515]
 [0.979 0.624 0.729]
 [0.979 0.683 0.79 ]
 [0.978 0.675 0.852]
 [0.984 0.771 0.829]
 [0.985 0.787 0.854]]
[[0.978 0.582 0.71 ]
 [0.963 0.353 0.69 ]
 [0.98  0.628 0.732]
 [0.98  0.662 0.782]
 [0.981 0.706 0.833]
 [0.986 0.771 0.822]
 [0.984 0.762 0.847]]
[[0.977 0.572 0.706]
 [0.97  0.13  0.736]
 [0.979 0.623 0.728]
 [0.979 0.677 0.801]
 [0.981 0.705 0.848]
 [0.986 0.774 0.81 ]
 [0.985 0.763 0.841]]
[[0.977 0.563 0.699]
 [0.967 0.018 0.5  ]
 [0.979 0.621 0.727]
 [0.978 0.657 0.801]
 [0.977 0.637 0.834]
 [0.983 0.747 0.821]
 [0.982 0.722 0.826]]


In [None]:
## Table 6 (Appendix): pair-wise t-test for table 2 across 5 trials
##   shape = 7x3
##   rows -> models
##   cols -> metrics

table_6 = np.zeros((7, 3))
table_6_prep = np.mean(results_final[1, :, :, :, :], axis=3)

for i in range(3):  ## Loop through metrics
    
    idx_best = np.argmax(table_2[:, i])
    
    for j in range(7):  ## Loop through models
    
        p_val = stats.ttest_rel(table_6_prep[j, i, :], table_6_prep[idx_best, i, :])[1]
        table_6[j, i] = p_val

print(np.round(table_6, decimals=3))

In [None]:
## Check which values we need to *
table_6 > 0.05

In [None]:
## Table 7 (Appendix): pair-wise t-test for table 3 across 5 trials
##   shape = 7x4
##   rows -> models
##   cols -> datasets

table_7 = np.zeros((7, 4))
table_7_prep = np.mean(results_final[1, :, :, :, :], axis=1)

for i in range(4):  ## Loop through datasets
    
    idx_best = np.argmax(table_3[:, i])
    
    for j in range(7):  ## Loop through models
    
        p_val = stats.ttest_rel(table_7_prep[j, :, i], table_7_prep[idx_best, :, i])[1]
        table_7[j, i] = p_val

print(np.round(table_7, decimals=3))

In [None]:
## Check which values we need to *
table_7 > 0.05

### Use cv_results to draw heatmaps ###
420 = 20 x 7 (models) x 3 (metrics)<br>
20 = 4 (datasets) x 5 (trials)<br>

0 to 104:   Adult dataset<br>
105 to 209:  Occupancy dataset<br>
210 to 314:  HTRU2 dataset<br>
315 to 419:  Activity dataset

Same model and same metric for every 21 cv_result

In [None]:
len(cv_results) 

In [None]:
logreg_acc_idx =    list(np.arange(0, 420, 21))
logreg_f1_idx =     list(np.arange(1, 420, 21))
logreg_rocauc_idx = list(np.arange(2, 420, 21))

percep_acc_idx =    list(np.arange(3, 420, 21))
percep_f1_idx =     list(np.arange(4, 420, 21))
percep_rocauc_idx = list(np.arange(5, 420, 21))

linsvm_acc_idx =    list(np.arange(6, 420, 21))
linsvm_f1_idx =     list(np.arange(7, 420, 21))
linsvm_rocauc_idx = list(np.arange(8, 420, 21))

rbfsvm_acc_idx =    list(np.arange(9, 420, 21))
rbfsvm_f1_idx =     list(np.arange(10, 420, 21))
rbfsvm_rocauc_idx = list(np.arange(11, 420, 21))

dectre_acc_idx =    list(np.arange(12, 420, 21))
dectre_f1_idx =     list(np.arange(13, 420, 21))
dectre_rocauc_idx = list(np.arange(14, 420, 21))

ranfor_acc_idx =    list(np.arange(15, 420, 21))
ranfor_f1_idx =     list(np.arange(16, 420, 21))
ranfor_rocauc_idx = list(np.arange(17, 420, 21))

graboo_acc_idx =    list(np.arange(18, 420, 21))
graboo_f1_idx =     list(np.arange(19, 420, 21))
graboo_rocauc_idx = list(np.arange(20, 420, 21))

#### Logistic Regression ####

In [None]:
## Accuracy
param_search_results_logreg_acc = pd.DataFrame()

for i in logreg_acc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_acc'] = cv_results[i]['mean_test_score']
    param_search_results_logreg_acc = pd.concat([param_search_results_logreg_acc, param_search_results], ignore_index=True)

param_search_results_logreg_acc = param_search_results_logreg_acc.groupby(['C'], as_index=True).mean()
sns.heatmap(param_search_results_logreg_acc, annot=True, fmt='.4f')
plt.title('Logistic Regression Accuracy Score')
plt.savefig('results/param_search_results_logreg_acc.png')
plt.show()

In [None]:
## F1
param_search_results_logreg_f1 = pd.DataFrame()

for i in logreg_f1_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_f1'] = cv_results[i]['mean_test_score']
    param_search_results_logreg_f1 = pd.concat([param_search_results_logreg_f1, param_search_results], ignore_index=True)

param_search_results_logreg_f1 = param_search_results_logreg_f1.groupby(['C'], as_index=True).mean()
sns.heatmap(param_search_results_logreg_f1, annot=True, fmt='.4f')
plt.title('Logistic Regression F1 Score')
plt.savefig('results/param_search_results_logreg_f1.png')
plt.show()

In [None]:
## ROC AUC
param_search_results_logreg_rocauc = pd.DataFrame()

for i in logreg_rocauc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_rocauc'] = cv_results[i]['mean_test_score']
    param_search_results_logreg_rocauc = pd.concat([param_search_results_logreg_rocauc, param_search_results], ignore_index=True)

param_search_results_logreg_rocauc = param_search_results_logreg_rocauc.groupby(['C'], as_index=True).mean()
sns.heatmap(param_search_results_logreg_rocauc, annot=True, fmt='.4f')
plt.title('Logistic Regression ROC AUC Score')
plt.savefig('results/param_search_results_logreg_rocauc.png')
plt.show()

#### Perceptron ####

In [None]:
## Accuracy
param_search_results_percep_acc = pd.DataFrame()

for i in percep_acc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_acc'] = cv_results[i]['mean_test_score']
    param_search_results_percep_acc = pd.concat([param_search_results_percep_acc, param_search_results], ignore_index=True)

param_search_results_percep_acc = param_search_results_percep_acc.groupby(['penalty', 'alpha'], as_index=True, group_keys=False).mean()
param_search_results_percep_acc.reset_index(inplace=True)
sns.heatmap(param_search_results_percep_acc.pivot('alpha', 'penalty', 'score_acc'), annot=True, fmt='.4f')
plt.title('Perceptron Accuracy Score')
plt.xlabel('penalty')
plt.savefig('results/param_search_results_percep_acc.png')
plt.show()

In [None]:
## F1
param_search_results_percep_f1 = pd.DataFrame()

for i in percep_f1_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_f1'] = cv_results[i]['mean_test_score']
    param_search_results_percep_f1 = pd.concat([param_search_results_percep_f1, param_search_results], ignore_index=True)

param_search_results_percep_f1 = param_search_results_percep_f1.groupby(['penalty', 'alpha'], as_index=True, group_keys=False).mean()
param_search_results_percep_f1.reset_index(inplace=True)
sns.heatmap(param_search_results_percep_f1.pivot('alpha', 'penalty', 'score_f1'), annot=True, fmt='.4f')
plt.title('Perceptron F1 Score')
plt.xlabel('penalty')
plt.savefig('results/param_search_results_percep_f1.png')
plt.show()

In [None]:
## ROC AUC
param_search_results_percep_rocauc = pd.DataFrame()

for i in percep_rocauc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_rocauc'] = cv_results[i]['mean_test_score']
    param_search_results_percep_rocauc = pd.concat([param_search_results_percep_rocauc, param_search_results], ignore_index=True)

param_search_results_percep_rocauc = param_search_results_percep_rocauc.groupby(['penalty', 'alpha'], as_index=True, group_keys=False).mean()
param_search_results_percep_rocauc.reset_index(inplace=True)
sns.heatmap(param_search_results_percep_rocauc.pivot('alpha', 'penalty', 'score_rocauc'), annot=True, fmt='.4f')
plt.title('Perceptron ROC AUC Score')
plt.xlabel('penalty')
plt.savefig('results/param_search_results_percep_rocauc.png')
plt.show()

#### Linear SVM ####

In [None]:
## Accuracy
param_search_results_linsvm_acc = pd.DataFrame()

for i in linsvm_acc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_acc'] = cv_results[i]['mean_test_score']
    param_search_results_linsvm_acc = pd.concat([param_search_results_linsvm_acc, param_search_results], ignore_index=True)

param_search_results_linsvm_acc = param_search_results_linsvm_acc.groupby(['C'], as_index=True).mean()
sns.heatmap(param_search_results_linsvm_acc, annot=True, fmt='.4f')
plt.title('Linear SVM Accuracy Score')
plt.savefig('results/param_search_results_linsvm_acc.png')
plt.show()

In [None]:
## F1
param_search_results_linsvm_f1 = pd.DataFrame()

for i in linsvm_f1_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_f1'] = cv_results[i]['mean_test_score']
    param_search_results_linsvm_f1 = pd.concat([param_search_results_linsvm_f1, param_search_results], ignore_index=True)

param_search_results_linsvm_f1 = param_search_results_linsvm_f1.groupby(['C'], as_index=True).mean()
sns.heatmap(param_search_results_linsvm_f1, annot=True, fmt='.4f')
plt.title('Linear SVM F1 Score')
plt.savefig('results/param_search_results_linsvm_f1.png')
plt.show()

In [None]:
## ROC AUC
param_search_results_linsvm_rocauc = pd.DataFrame()

for i in linsvm_rocauc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_rocauc'] = cv_results[i]['mean_test_score']
    param_search_results_linsvm_rocauc = pd.concat([param_search_results_linsvm_rocauc, param_search_results], ignore_index=True)

param_search_results_linsvm_rocauc = param_search_results_linsvm_rocauc.groupby(['C'], as_index=True).mean()
sns.heatmap(param_search_results_linsvm_rocauc, annot=True, fmt='.4f')
plt.title('Linear SVM ROC AUC Score')
plt.savefig('results/param_search_results_linsvm_rocauc.png')
plt.show()

#### RBF SVM ####

In [None]:
## Accuracy
param_search_results_rbfsvm_acc = pd.DataFrame()

for i in rbfsvm_acc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_acc'] = cv_results[i]['mean_test_score']
    param_search_results_rbfsvm_acc = pd.concat([param_search_results_rbfsvm_acc, param_search_results], ignore_index=True)

param_search_results_rbfsvm_acc = param_search_results_rbfsvm_acc.groupby(['C', 'gamma'], as_index=True, group_keys=False).mean()
param_search_results_rbfsvm_acc.reset_index(inplace=True)
sns.heatmap(param_search_results_rbfsvm_acc.pivot('C', 'gamma', 'score_acc'), annot=True, fmt='.4f')
plt.xlabel('gamma')
plt.title('RBF SVM Accuracy Score')
plt.savefig('results/param_search_results_rbfsvm_acc.png')
plt.show()

In [None]:
## F1
param_search_results_rbfsvm_f1 = pd.DataFrame()

for i in rbfsvm_f1_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_f1'] = cv_results[i]['mean_test_score']
    param_search_results_rbfsvm_f1 = pd.concat([param_search_results_rbfsvm_f1, param_search_results], ignore_index=True)

param_search_results_rbfsvm_f1 = param_search_results_rbfsvm_f1.groupby(['C', 'gamma'], as_index=True, group_keys=False).mean()
param_search_results_rbfsvm_f1.reset_index(inplace=True)
sns.heatmap(param_search_results_rbfsvm_f1.pivot('C', 'gamma', 'score_f1'), annot=True, fmt='.4f')
plt.xlabel('gamma')
plt.title('RBF SVM F1 Score')
plt.savefig('results/param_search_results_rbfsvm_f1.png')
plt.show()

In [None]:
## ROC AUC
param_search_results_rbfsvm_rocauc = pd.DataFrame()

for i in rbfsvm_rocauc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_rocauc'] = cv_results[i]['mean_test_score']
    param_search_results_rbfsvm_rocauc = pd.concat([param_search_results_rbfsvm_rocauc, param_search_results], ignore_index=True)

param_search_results_rbfsvm_rocauc = param_search_results_rbfsvm_rocauc.groupby(['C', 'gamma'], as_index=True, group_keys=False).mean()
param_search_results_rbfsvm_rocauc.reset_index(inplace=True)
sns.heatmap(param_search_results_rbfsvm_rocauc.pivot('C', 'gamma', 'score_rocauc'), annot=True, fmt='.4f')
plt.xlabel('gamma')
plt.title('RBF SVM ROC AUC Score')
plt.savefig('results/param_search_results_rbfsvm_rocauc.png')
plt.show()

#### Decision Tree ####

In [None]:
## Accuracy
param_search_results_dectre_acc = pd.DataFrame()

for i in dectre_acc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_acc'] = cv_results[i]['mean_test_score']
    param_search_results_dectre_acc = pd.concat([param_search_results_dectre_acc, param_search_results], ignore_index=True)

param_search_results_dectre_acc = param_search_results_dectre_acc.groupby(['max_depth', 'ccp_alpha', 'criterion'], as_index=True, group_keys=False).mean()
param_search_results_dectre_acc.reset_index(inplace=True)

param_search_results_dectre_acc_entropy = param_search_results_dectre_acc[param_search_results_dectre_acc['criterion'] == 'entropy']
param_search_results_dectre_acc_entropy = param_search_results_dectre_acc_entropy.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_dectre_acc_entropy.pivot('max_depth', 'ccp_alpha', 'score_acc'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Decision Tree (Entropy) Accuracy Score')
plt.savefig('results/param_search_results_dectre_entropy_acc.png')
plt.show()

param_search_results_dectre_acc_gini = param_search_results_dectre_acc[param_search_results_dectre_acc['criterion'] == 'gini']
param_search_results_dectre_acc_gini = param_search_results_dectre_acc_gini.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_dectre_acc_gini.pivot('max_depth', 'ccp_alpha', 'score_acc'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Decision Tree (Gini) Accuracy Score')
plt.savefig('results/param_search_results_dectre_gini_acc.png')
plt.show()

In [None]:
## F1
param_search_results_dectre_f1 = pd.DataFrame()

for i in dectre_f1_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_f1'] = cv_results[i]['mean_test_score']
    param_search_results_dectre_f1 = pd.concat([param_search_results_dectre_f1, param_search_results], ignore_index=True)

param_search_results_dectre_f1 = param_search_results_dectre_f1.groupby(['max_depth', 'ccp_alpha', 'criterion'], as_index=True, group_keys=False).mean()
param_search_results_dectre_f1.reset_index(inplace=True)

param_search_results_dectre_f1_entropy = param_search_results_dectre_f1[param_search_results_dectre_f1['criterion'] == 'entropy']
param_search_results_dectre_f1_entropy = param_search_results_dectre_f1_entropy.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_dectre_f1_entropy.pivot('max_depth', 'ccp_alpha', 'score_f1'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Decision Tree (Entropy) F1 Score')
plt.savefig('results/param_search_results_dectre_entropy_f1.png')
plt.show()

param_search_results_dectre_f1_gini = param_search_results_dectre_f1[param_search_results_dectre_f1['criterion'] == 'gini']
param_search_results_dectre_f1_gini = param_search_results_dectre_f1_gini.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_dectre_f1_gini.pivot('max_depth', 'ccp_alpha', 'score_f1'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Decision Tree (Gini) F1 Score')
plt.savefig('results/param_search_results_dectre_gini_f1.png')
plt.show()

In [None]:
## ROC AUC
param_search_results_dectre_rocauc = pd.DataFrame()

for i in dectre_rocauc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_rocauc'] = cv_results[i]['mean_test_score']
    param_search_results_dectre_rocauc = pd.concat([param_search_results_dectre_rocauc, param_search_results], ignore_index=True)

param_search_results_dectre_rocauc = param_search_results_dectre_rocauc.groupby(['max_depth', 'ccp_alpha', 'criterion'], as_index=True, group_keys=False).mean()
param_search_results_dectre_rocauc.reset_index(inplace=True)

param_search_results_dectre_rocauc_entropy = param_search_results_dectre_rocauc[param_search_results_dectre_rocauc['criterion'] == 'entropy']
param_search_results_dectre_rocauc_entropy = param_search_results_dectre_rocauc_entropy.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_dectre_rocauc_entropy.pivot('max_depth', 'ccp_alpha', 'score_rocauc'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Decision Tree (Entropy) ROC AUC Score')
plt.savefig('results/param_search_results_dectre_entropy_rocauc.png')
plt.show()

param_search_results_dectre_rocauc_gini = param_search_results_dectre_rocauc[param_search_results_dectre_rocauc['criterion'] == 'gini']
param_search_results_dectre_rocauc_gini = param_search_results_dectre_rocauc_gini.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_dectre_rocauc_gini.pivot('max_depth', 'ccp_alpha', 'score_rocauc'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Decision Tree (Gini) ROC AUC Score')
plt.savefig('results/param_search_results_dectre_gini_rocauc.png')
plt.show()

#### Random Forest ####

In [None]:
## Accuracy
param_search_results_ranfor_acc = pd.DataFrame()

for i in ranfor_acc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_acc'] = cv_results[i]['mean_test_score']
    param_search_results_ranfor_acc = pd.concat([param_search_results_ranfor_acc, param_search_results], ignore_index=True)

param_search_results_ranfor_acc = param_search_results_ranfor_acc.groupby(['n_estimators', 'ccp_alpha', 'criterion'], as_index=True, group_keys=False).mean()
param_search_results_ranfor_acc.reset_index(inplace=True)

param_search_results_ranfor_acc_entropy = param_search_results_ranfor_acc[param_search_results_ranfor_acc['criterion'] == 'entropy']
param_search_results_ranfor_acc_entropy = param_search_results_ranfor_acc_entropy.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_ranfor_acc_entropy.pivot('n_estimators', 'ccp_alpha', 'score_acc'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Random Forest (Entropy) Accuracy Score')
plt.savefig('results/param_search_results_ranfor_entropy_acc.png')
plt.show()

param_search_results_ranfor_acc_gini = param_search_results_ranfor_acc[param_search_results_ranfor_acc['criterion'] == 'gini']
param_search_results_ranfor_acc_gini = param_search_results_ranfor_acc_gini.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_ranfor_acc_gini.pivot('n_estimators', 'ccp_alpha', 'score_acc'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Random Forest (Gini) Accuracy Score')
plt.savefig('results/param_search_results_ranfor_gini_acc.png')
plt.show()

In [None]:
## F1
param_search_results_ranfor_f1 = pd.DataFrame()

for i in ranfor_f1_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_f1'] = cv_results[i]['mean_test_score']
    param_search_results_ranfor_f1 = pd.concat([param_search_results_ranfor_f1, param_search_results], ignore_index=True)

param_search_results_ranfor_f1 = param_search_results_ranfor_f1.groupby(['n_estimators', 'ccp_alpha', 'criterion'], as_index=True, group_keys=False).mean()
param_search_results_ranfor_f1.reset_index(inplace=True)

param_search_results_ranfor_f1_entropy = param_search_results_ranfor_f1[param_search_results_ranfor_f1['criterion'] == 'entropy']
param_search_results_ranfor_f1_entropy = param_search_results_ranfor_f1_entropy.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_ranfor_f1_entropy.pivot('n_estimators', 'ccp_alpha', 'score_f1'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Random Forest (Entropy) F1 Score')
plt.savefig('results/param_search_results_ranfor_entropy_f1.png')
plt.show()

param_search_results_ranfor_f1_gini = param_search_results_ranfor_f1[param_search_results_ranfor_f1['criterion'] == 'gini']
param_search_results_ranfor_f1_gini = param_search_results_ranfor_f1_gini.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_ranfor_f1_gini.pivot('n_estimators', 'ccp_alpha', 'score_f1'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Random Forest (Gini) F1 Score')
plt.savefig('results/param_search_results_ranfor_gini_f1.png')
plt.show()

In [None]:
## ROC AUC
param_search_results_ranfor_rocauc = pd.DataFrame()

for i in ranfor_rocauc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_rocauc'] = cv_results[i]['mean_test_score']
    param_search_results_ranfor_rocauc = pd.concat([param_search_results_ranfor_rocauc, param_search_results], ignore_index=True)

param_search_results_ranfor_rocauc = param_search_results_ranfor_rocauc.groupby(['n_estimators', 'ccp_alpha', 'criterion'], as_index=True, group_keys=False).mean()
param_search_results_ranfor_rocauc.reset_index(inplace=True)

param_search_results_ranfor_rocauc_entropy = param_search_results_ranfor_rocauc[param_search_results_ranfor_rocauc['criterion'] == 'entropy']
param_search_results_ranfor_rocauc_entropy = param_search_results_ranfor_rocauc_entropy.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_ranfor_rocauc_entropy.pivot('n_estimators', 'ccp_alpha', 'score_rocauc'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Random Forest (Entropy) ROC AUC Score')
plt.savefig('results/param_search_results_ranfor_entropy_rocauc.png')
plt.show()

param_search_results_ranfor_rocauc_gini = param_search_results_ranfor_rocauc[param_search_results_ranfor_rocauc['criterion'] == 'gini']
param_search_results_ranfor_rocauc_gini = param_search_results_ranfor_rocauc_gini.drop(columns=['criterion'])

plt.figure()
sns.heatmap(param_search_results_ranfor_rocauc_gini.pivot('n_estimators', 'ccp_alpha', 'score_rocauc'), annot=True, fmt='.4f')
plt.xlabel('ccp_alpha')
plt.title('Random Forest (Gini) ROC AUC Score')
plt.savefig('results/param_search_results_ranfor_gini_rocauc.png')
plt.show()

#### Gradient Boosting Classifier ####

In [None]:
## Accuracy
param_search_results_graboo_acc = pd.DataFrame()

for i in graboo_acc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_acc'] = cv_results[i]['mean_test_score']
    param_search_results_graboo_acc = pd.concat([param_search_results_graboo_acc, param_search_results], ignore_index=True)

param_search_results_graboo_acc = param_search_results_graboo_acc.groupby(['learning_rate', 'n_estimators'], as_index=True, group_keys=False).mean()
param_search_results_graboo_acc.reset_index(inplace=True)
sns.heatmap(param_search_results_graboo_acc.pivot('learning_rate', 'n_estimators', 'score_acc'), annot=True, fmt='.4f')
plt.xlabel('n_estimators')
plt.title('Gradient Boosting Accuracy Score')
plt.savefig('results/param_search_results_graboo_acc.png')
plt.show()

In [None]:
## F1
param_search_results_graboo_f1 = pd.DataFrame()

for i in graboo_f1_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_f1'] = cv_results[i]['mean_test_score']
    param_search_results_graboo_f1 = pd.concat([param_search_results_graboo_f1, param_search_results], ignore_index=True)

param_search_results_graboo_f1 = param_search_results_graboo_f1.groupby(['learning_rate', 'n_estimators'], as_index=True, group_keys=False).mean()
param_search_results_graboo_f1.reset_index(inplace=True)
sns.heatmap(param_search_results_graboo_f1.pivot('learning_rate', 'n_estimators', 'score_f1'), annot=True, fmt='.4f')
plt.xlabel('n_estimators')
plt.title('Gradient Boosting F1 Score')
plt.savefig('results/param_search_results_graboo_f1.png')
plt.show()

In [None]:
## ROC AUC
param_search_results_graboo_rocauc = pd.DataFrame()

for i in graboo_rocauc_idx:
    param_search_results = pd.DataFrame(cv_results[i]['params'])
    param_search_results['score_rocauc'] = cv_results[i]['mean_test_score']
    param_search_results_graboo_rocauc = pd.concat([param_search_results_graboo_rocauc, param_search_results], ignore_index=True)

param_search_results_graboo_rocauc = param_search_results_graboo_rocauc.groupby(['learning_rate', 'n_estimators'], as_index=True, group_keys=False).mean()
param_search_results_graboo_rocauc.reset_index(inplace=True)
sns.heatmap(param_search_results_graboo_rocauc.pivot('learning_rate', 'n_estimators', 'score_rocauc'), annot=True, fmt='.4f')
plt.xlabel('n_estimators')
plt.title('Gradient Boosting ROC AUC Score')
plt.savefig('results/param_search_results_graboo_rocauc.png')
plt.show()