### Import libraries

In [24]:
from IPython.core.display import display
from src.utils.preprocessing import classic_preprocessing,standardize
from src.utils.get_data import import_data, split_experts
from src.utils.train import hyperparameter_tuning_cv
from src.utils.config import *
import pandas as pd

In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load Data

In [5]:
DATA_PATH = '../../data'
X_coarse, y_coarse = import_data(DATA_PATH, segmentation_type='coarse',
                                 drop_user_features=False,
                                 drop_expert=True)

In [25]:
# For each expert separately
X_e, y_e = import_data(DATA_PATH, segmentation_type='coarse',
                                 drop_user_features=False,
                                 drop_expert=False)

In [None]:
display(X_coarse.head())
display(y_coarse.head())

### Preprocessing

In [6]:
X_coarse = classic_preprocessing(X_coarse)

In [26]:
# Save the expert feature for split
expert = X_e.copy().Expert
X_e = classic_preprocessing(X_e)
X_e.Expert = expert.values

### Split expert models

In [27]:
X_e_1, y_e_1, X_e_2, y_e_2, X_e_3, y_e_3 = split_experts(X_e, y_e)

### Grid search

#### 1. Logistic regression

In [7]:
log_results = hyperparameter_tuning_cv(model='logistic', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=LOGISTIC_PARAMS)

display(log_results)

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_iter,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10000,True,0.422494,0.592608,0.636434
10000,False,0.187728,0.53433,0.728295
100000,True,0.422494,0.592608,0.636434
100000,False,0.187728,0.53433,0.728295


In [8]:
# Best model parameters

best_log = log_results.iloc[[log_results.reset_index()['f1_score'].idxmax()]]

display(best_log)


Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_iter,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10000,True,0.422494,0.592608,0.636434


In [28]:
# for each expert
log_results_1 = hyperparameter_tuning_cv(model='logistic', data=X_e_1, labels=y_e_1, cv_k=5,
                                       params=LOGISTIC_PARAMS)
log_results_2 = hyperparameter_tuning_cv(model='logistic', data=X_e_2, labels=y_e_2, cv_k=5,
                                       params=LOGISTIC_PARAMS)
log_results_3 = hyperparameter_tuning_cv(model='logistic', data=X_e_3, labels=y_e_3, cv_k=5,
                                       params=LOGISTIC_PARAMS)

In [29]:
# Best model parameters

best_log_1 = log_results_1.iloc[[log_results_1.reset_index()['roc_auc_score'].idxmax()]]
best_log_2 = log_results_2.iloc[[log_results_2.reset_index()['roc_auc_score'].idxmax()]]
best_log_3 = log_results_3.iloc[[log_results_3.reset_index()['roc_auc_score'].idxmax()]]

display(best_log_1, best_log_2, best_log_3)


Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_iter,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10000,True,0.377304,0.647086,0.769381


Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_iter,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10000,True,0.281876,0.541837,0.645213


Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_iter,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10000,True,0.641064,0.651386,0.650857


#### 2. Linear Discriminant Analysis

In [9]:
lda_results = hyperparameter_tuning_cv(model='lda', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=LDA_PARAMS)

display(lda_results)

Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.420431,0.591335,0.636822
False,0.1934,0.536121,0.728876


In [10]:
# Best model parameters

best_lda = lda_results.iloc[[lda_results.reset_index()['f1_score'].idxmax()]]

display(best_lda)

Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.420431,0.591335,0.636822


In [30]:
# For each expert
lda_results_1 = hyperparameter_tuning_cv(model='lda', data=X_e_1, labels=y_e_1, cv_k=5,
                                       params=LDA_PARAMS)
lda_results_2 = hyperparameter_tuning_cv(model='lda', data=X_e_2, labels=y_e_2, cv_k=5,
                                       params=LDA_PARAMS)
lda_results_3 = hyperparameter_tuning_cv(model='lda', data=X_e_3, labels=y_e_3, cv_k=5,
                                       params=LDA_PARAMS)


In [31]:
best_lda_1 = lda_results_1.iloc[[lda_results_1.reset_index()['roc_auc_score'].idxmax()]]
best_lda_2 = lda_results_2.iloc[[lda_results_2.reset_index()['roc_auc_score'].idxmax()]]
best_lda_3 = lda_results_3.iloc[[lda_results_3.reset_index()['roc_auc_score'].idxmax()]]

display(best_lda_1, best_lda_2, best_lda_3)

Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.3901,0.664183,0.753746


Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.273949,0.535893,0.644681


Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.633242,0.644654,0.644571


#### 3. K-nearest Neighbors

In [11]:
knn_results = hyperparameter_tuning_cv(model='knn', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=KNN_PARAMS)

display(knn_results)

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
n_neighbors,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,True,0.459767,0.61416,0.60969
1,False,0.436942,0.61506,0.692054
2,True,0.434702,0.604448,0.649612
2,False,0.233164,0.54503,0.726163
3,True,0.454197,0.598091,0.546124
3,False,0.348949,0.571339,0.694186
4,True,0.443779,0.597513,0.587791
4,False,0.221902,0.541596,0.725388
5,True,0.45273,0.590081,0.511434
5,False,0.291581,0.552416,0.702132


In [12]:
# Best model parameters

best_knn = knn_results.iloc[[knn_results.reset_index()['f1_score'].idxmax()]]

display(best_knn)

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
n_neighbors,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,True,0.459767,0.61416,0.60969


In [32]:
knn_results_1 = hyperparameter_tuning_cv(model='knn', data=X_e_1, labels=y_e_1, cv_k=5, params=KNN_PARAMS)
knn_results_2 = hyperparameter_tuning_cv(model='knn', data=X_e_2, labels=y_e_2, cv_k=5, params=KNN_PARAMS)
knn_results_3 = hyperparameter_tuning_cv(model='knn', data=X_e_3, labels=y_e_3, cv_k=5, params=KNN_PARAMS)

In [33]:
best_knn_1 = knn_results_1.iloc[[knn_results_1.reset_index()['roc_auc_score'].idxmax()]]
best_knn_2 = knn_results_2.iloc[[knn_results_2.reset_index()['roc_auc_score'].idxmax()]]
best_knn_3 = knn_results_3.iloc[[knn_results_3.reset_index()['roc_auc_score'].idxmax()]]

display(best_knn_1, best_knn_2, best_knn_3)

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
n_neighbors,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,True,0.379099,0.659479,0.74202


Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
n_neighbors,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,True,0.328256,0.575158,0.619681


Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
n_neighbors,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,False,0.648406,0.663485,0.664


#### 4. Support Vector Classifier

In [13]:
svc_results = hyperparameter_tuning_cv(model='svc', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=SVC_PARAMS)

display(svc_results)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
kernel,gamma,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
linear,0.1,True,0.412603,0.585334,0.632558
linear,0.1,False,0.0,0.5,0.73314
rbf,0.1,True,0.132269,0.529001,0.740891
rbf,0.1,False,0.017336,0.503735,0.734496
linear,0.01,True,0.412603,0.585334,0.632558
linear,0.01,False,0.0,0.5,0.73314
rbf,0.01,True,0.423993,0.599318,0.658333
rbf,0.01,False,0.01016,0.502426,0.734302


In [14]:
# Best model parameters

best_svc = svc_results.iloc[[svc_results.reset_index()['f1_score'].idxmax()]]

display(best_svc)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
kernel,gamma,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rbf,0.01,True,0.423993,0.599318,0.658333


In [34]:
svc_results_1 = hyperparameter_tuning_cv(model='svc', data=X_e_1, labels=y_e_1, cv_k=5, params=SVC_PARAMS)
svc_results_2 = hyperparameter_tuning_cv(model='svc', data=X_e_2, labels=y_e_2, cv_k=5, params=SVC_PARAMS)
svc_results_3 = hyperparameter_tuning_cv(model='svc', data=X_e_3, labels=y_e_3, cv_k=5, params=SVC_PARAMS)

In [35]:
best_svc_1 = svc_results_1.iloc[[svc_results_1.reset_index()['roc_auc_score'].idxmax()]]
best_svc_2 = svc_results_2.iloc[[svc_results_2.reset_index()['roc_auc_score'].idxmax()]]
best_svc_3 = svc_results_3.iloc[[svc_results_3.reset_index()['roc_auc_score'].idxmax()]]


display(best_svc_1, best_svc_2, best_svc_3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
kernel,gamma,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rbf,0.01,True,0.423142,0.668896,0.816938


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
kernel,gamma,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
linear,0.1,True,0.272192,0.534233,0.640957


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
kernel,gamma,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rbf,0.01,False,0.644609,0.671299,0.671429


#### 5. Naive Bayes Classifier

In [15]:
nb_results = hyperparameter_tuning_cv(model='naive_bayes', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=NAIVE_BAYES_PARAMS)

display(nb_results)

Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.012715,0.498968,0.728488
False,0.018182,0.498831,0.726938


In [16]:
# Best model parameters

best_nb = nb_results.iloc[[nb_results.reset_index()['f1_score'].idxmax()]]

display(best_nb)


Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,0.018182,0.498831,0.726938


In [36]:
nb_results_1 = hyperparameter_tuning_cv(model='naive_bayes', data=X_e_1, labels=y_e_1, cv_k=5, params=NAIVE_BAYES_PARAMS)
nb_results_2 = hyperparameter_tuning_cv(model='naive_bayes', data=X_e_2, labels=y_e_2, cv_k=5, params=NAIVE_BAYES_PARAMS)
nb_results_3 = hyperparameter_tuning_cv(model='naive_bayes', data=X_e_3, labels=y_e_3, cv_k=5, params=NAIVE_BAYES_PARAMS)

In [37]:
best_nb_1 = nb_results_1.iloc[[nb_results_1.reset_index()['roc_auc_score'].idxmax()]]
best_nb_2 = nb_results_2.iloc[[nb_results_2.reset_index()['roc_auc_score'].idxmax()]]
best_nb_3 = nb_results_3.iloc[[nb_results_3.reset_index()['roc_auc_score'].idxmax()]]

display(best_nb_1, best_nb_2, best_nb_3)

Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.325545,0.610851,0.731596


Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,0.300389,0.53382,0.513298


Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,0.254557,0.529126,0.542857


#### 6. Decision Tree

In [17]:
dt_results = hyperparameter_tuning_cv(model='decision_tree', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=DECISION_TREE_PARAMS)

display(dt_results)

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,True,0.359113,0.527987,0.544961
3,False,0.09893,0.514578,0.726744
5,True,0.348168,0.549841,0.632752
5,False,0.238326,0.541775,0.713566
7,True,0.371938,0.55773,0.621124
7,False,0.268333,0.541888,0.693798


In [18]:
# Best model parameters

best_dt = dt_results.iloc[[dt_results.reset_index()['f1_score'].idxmax()]]

display(best_dt)


Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7,True,0.371938,0.55773,0.621124


In [38]:
dt_results_1 = hyperparameter_tuning_cv(model='decision_tree', data=X_e_1, labels=y_e_1, cv_k=5, params=DECISION_TREE_PARAMS)
dt_results_2 = hyperparameter_tuning_cv(model='decision_tree', data=X_e_2, labels=y_e_2, cv_k=5, params=DECISION_TREE_PARAMS)
dt_results_3 = hyperparameter_tuning_cv(model='decision_tree', data=X_e_3, labels=y_e_3, cv_k=5, params=DECISION_TREE_PARAMS)

In [39]:
best_dt_1 = dt_results_1.iloc[[dt_results_1.reset_index()['roc_auc_score'].idxmax()]]
best_dt_2 = dt_results_2.iloc[[dt_results_2.reset_index()['roc_auc_score'].idxmax()]]
best_dt_3 = dt_results_3.iloc[[dt_results_3.reset_index()['roc_auc_score'].idxmax()]]

display(best_dt_1, best_dt_2, best_dt_3)

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,True,0.348854,0.634479,0.708795


Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,True,0.226904,0.515438,0.671277


Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,True,0.54426,0.609622,0.611429


#### 7. Random Forest

In [19]:
rf_results = hyperparameter_tuning_cv(model='random_forest', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=RANDOM_FOREST_PARAMS)

display(rf_results)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,3,True,0.394905,0.558527,0.57907
3,3,False,0.028014,0.502418,0.729845
3,5,True,0.38967,0.55324,0.572674
3,5,False,0.005933,0.500432,0.732364
3,7,True,0.397271,0.564976,0.593605
3,7,False,0.007466,0.501369,0.733333
5,3,True,0.386112,0.554904,0.585465
5,3,False,0.118938,0.516386,0.724225
5,5,True,0.397387,0.572481,0.619961
5,5,False,0.091582,0.514257,0.730233


In [20]:
# Best model parameters

best_rf = rf_results.iloc[[rf_results.reset_index()['f1_score'].idxmax()]]

display(best_rf)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7,5,True,0.411106,0.581917,0.622093


In [40]:
rf_results_1 = hyperparameter_tuning_cv(model='random_forest', data=X_e_1, labels=y_e_1, cv_k=5, params=RANDOM_FOREST_PARAMS)
rf_results_2 = hyperparameter_tuning_cv(model='random_forest', data=X_e_2, labels=y_e_2, cv_k=5, params=RANDOM_FOREST_PARAMS)
rf_results_3 = hyperparameter_tuning_cv(model='random_forest', data=X_e_3, labels=y_e_3, cv_k=5, params=RANDOM_FOREST_PARAMS)


In [41]:
best_rf_1 = rf_results_1.iloc[[rf_results_1.reset_index()['roc_auc_score'].idxmax()]]
best_rf_2 = rf_results_2.iloc[[rf_results_2.reset_index()['roc_auc_score'].idxmax()]]
best_rf_3 = rf_results_3.iloc[[rf_results_3.reset_index()['roc_auc_score'].idxmax()]]

display(best_rf_1, best_rf_2, best_rf_3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,7,True,0.379682,0.65996,0.732248


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,5,True,0.29768,0.546118,0.605319


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7,7,False,0.609003,0.636512,0.635429


#### 8. Gradient Boosting

In [21]:
gb_results = hyperparameter_tuning_cv(model='gradient_boosting', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=GRADIENT_BOOSTING_PARAMS)

display(gb_results)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,3,True,0.35181,0.540515,0.602907
3,3,False,0.0,0.5,0.73314
3,5,True,0.36829,0.551696,0.611434
3,5,False,0.0,0.5,0.73314
3,7,True,0.375941,0.558889,0.618992
3,7,False,0.0,0.5,0.73314
5,3,True,0.354346,0.563117,0.658915
5,3,False,0.0,0.499871,0.732946
5,5,True,0.368552,0.568675,0.657558
5,5,False,0.00295,0.500341,0.732946


In [22]:
# Best model parameters

best_gb = gb_results.iloc[[gb_results.reset_index()['f1_score'].idxmax()]]

display(best_gb)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7,7,True,0.38823,0.584842,0.677907


In [42]:
gb_results_1 = hyperparameter_tuning_cv(model='gradient_boosting', data=X_e_1, labels=y_e_1, cv_k=5, params=GRADIENT_BOOSTING_PARAMS)
gb_results_2 = hyperparameter_tuning_cv(model='gradient_boosting', data=X_e_2, labels=y_e_2, cv_k=5, params=GRADIENT_BOOSTING_PARAMS)
gb_results_3 = hyperparameter_tuning_cv(model='gradient_boosting', data=X_e_3, labels=y_e_3, cv_k=5, params=GRADIENT_BOOSTING_PARAMS)

In [43]:
best_gb_1 = gb_results_1.iloc[[gb_results_1.reset_index()['roc_auc_score'].idxmax()]]
best_gb_2 = gb_results_2.iloc[[gb_results_2.reset_index()['roc_auc_score'].idxmax()]]
best_gb_3 = gb_results_3.iloc[[gb_results_3.reset_index()['roc_auc_score'].idxmax()]]
display(best_gb_1, best_gb_2, best_gb_3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,3,True,0.3914,0.677856,0.711401


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7,7,True,0.245231,0.543916,0.73883


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,3,True,0.611196,0.639485,0.641143


### Results

In [23]:
display('logistic', best_log)
display('lda', best_lda)
display('knn', best_knn)
display('svc', best_svc)
display('naive_bayes', best_nb)
display('decision_tree', best_dt)
display('random_forest', best_rf)
display('gradient_boosting', best_gb)

'logistic'

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_iter,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10000,True,0.422494,0.592608,0.636434


'lda'

Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.420431,0.591335,0.636822


'knn'

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
n_neighbors,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,True,0.459767,0.61416,0.60969


'svc'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
kernel,gamma,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rbf,0.01,True,0.423993,0.599318,0.658333


'naive_bayes'

Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,0.018182,0.498831,0.726938


'decision_tree'

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7,True,0.371938,0.55773,0.621124


'random_forest'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7,5,True,0.411106,0.581917,0.622093


'gradient_boosting'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7,7,True,0.38823,0.584842,0.677907


In [44]:
# Results expert features
# First row is Expert 1, second Expert 2, third Expert 3
display('logistic', pd.concat([best_log_1,best_log_2,best_log_3]))
display('lda', pd.concat([best_lda_1,best_lda_2,best_lda_3]))
display('knn', pd.concat([best_knn_1,best_knn_2,best_knn_3]))
display('svc', pd.concat([best_svc_1,best_svc_2,best_svc_3]))
display('naive_bayes', pd.concat([best_nb_1,best_nb_2,best_nb_3]))
display('decision_tree', pd.concat([best_dt_1,best_dt_2,best_dt_3]))
display('random_forest', pd.concat([best_rf_1,best_rf_2,best_rf_3]))
display('gradient_boosting', pd.concat([best_gb_1,best_gb_2,best_gb_3]))

'logistic'

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_iter,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10000,True,0.377304,0.647086,0.769381
10000,True,0.281876,0.541837,0.645213
10000,True,0.641064,0.651386,0.650857


'lda'

Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.3901,0.664183,0.753746
True,0.273949,0.535893,0.644681
True,0.633242,0.644654,0.644571


'knn'

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
n_neighbors,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,True,0.379099,0.659479,0.74202
1,True,0.328256,0.575158,0.619681
1,False,0.648406,0.663485,0.664


'svc'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
kernel,gamma,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rbf,0.01,True,0.423142,0.668896,0.816938
linear,0.1,True,0.272192,0.534233,0.640957
rbf,0.01,False,0.644609,0.671299,0.671429


'naive_bayes'

Unnamed: 0_level_0,f1_score,roc_auc_score,accuracy_score
oversampling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
True,0.325545,0.610851,0.731596
False,0.300389,0.53382,0.513298
False,0.254557,0.529126,0.542857


'decision_tree'

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,oversampling,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,True,0.348854,0.634479,0.708795
5,True,0.226904,0.515438,0.671277
3,True,0.54426,0.609622,0.611429


'random_forest'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,7,True,0.379682,0.65996,0.732248
3,5,True,0.29768,0.546118,0.605319
7,7,False,0.609003,0.636512,0.635429


'gradient_boosting'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_score,roc_auc_score,accuracy_score
max_depth,n_estimators,oversampling,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,3,True,0.3914,0.677856,0.711401
7,7,True,0.245231,0.543916,0.73883
3,3,True,0.611196,0.639485,0.641143


### Conclusions

TODO