### Import libraries

In [None]:
import pandas as pd
from IPython.core.display import display
from src.utils.preprocessing import classic_preprocessing
from src.utils.get_data import import_data
from src.utils.train import hyperparameter_tuning_cv
from src.utils.config import *

In [None]:
%load_ext autoreload
%autoreload 2

### Load Data

In [None]:
DATA_PATH = '../../data'
X_coarse, y_coarse = import_data(DATA_PATH, segmentation_type='coarse',
                                 drop_user_features=False,
                                 drop_expert=True)

In [None]:
display(X_coarse.head())
display(y_coarse.head())

### Preprocessing

In [None]:
X_coarse = classic_preprocessing(X_coarse)

### Grid search

#### 1. Logistic regression

In [None]:
log_results = hyperparameter_tuning_cv(model='logistic', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=LOGISTIC_PARAMS)

display(log_results)

In [None]:
# Best model parameters

best_log = log_results.iloc[[log_results.reset_index()['f1_score'].idxmax()]]

display(best_log)


#### 2. Linear Discriminant Analysis

In [None]:
lda_results = hyperparameter_tuning_cv(model='lda', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=LDA_PARAMS)

display(lda_results)

In [None]:
# Best model parameters

best_lda = lda_results.iloc[[lda_results.reset_index()['f1_score'].idxmax()]]

display(best_lda)

#### 3. K-nearest Neighbors

In [None]:
knn_results = hyperparameter_tuning_cv(model='knn', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=KNN_PARAMS)

display(knn_results)

In [None]:
# Best model parameters

best_knn = knn_results.iloc[[knn_results.reset_index()['f1_score'].idxmax()]]

display(best_knn)

#### 4. Support Vector Classifier

In [None]:
svc_results = hyperparameter_tuning_cv(model='svc', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=SVC_PARAMS)

display(svc_results)

In [None]:
# Best model parameters

best_svc = svc_results.iloc[[svc_results.reset_index()['f1_score'].idxmax()]]

display(best_svc)

#### 5. Naive Bayes Classifier

In [None]:
nb_results = hyperparameter_tuning_cv(model='naive_bayes', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=NAIVE_BAYES_PARAMS)

display(nb_results)

In [None]:
# Best model parameters

best_nb = nb_results.iloc[[nb_results.reset_index()['f1_score'].idxmax()]]

display(best_nb)


#### 6. Decision Tree

In [None]:
dt_results = hyperparameter_tuning_cv(model='decision_tree', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=DECISION_TREE_PARAMS)

display(dt_results)

In [None]:
# Best model parameters

best_dt = dt_results.iloc[[dt_results.reset_index()['f1_score'].idxmax()]]

display(best_dt)


#### 7. Random Forest

In [None]:
rf_results = hyperparameter_tuning_cv(model='random_forest', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=RANDOM_FOREST_PARAMS)

display(rf_results)

In [None]:
# Best model parameters

best_rf = rf_results.iloc[[rf_results.reset_index()['f1_score'].idxmax()]]

display(best_rf)

#### 8. Gradient Boosting

In [None]:
gb_results = hyperparameter_tuning_cv(model='gradient_boosting', data=X_coarse, labels=y_coarse.Label, cv_k=5,
                                       params=GRADIENT_BOOSTING_PARAMS)

display(gb_results)

In [None]:
# Best model parameters

best_gb = gb_results.iloc[[gb_results.reset_index()['f1_score'].idxmax()]]

display(best_gb)

### Results

In [None]:
display('logistic', best_log)
display('lda', best_lda)
display('knn', best_knn)
display('svc', best_svc)
display('naive_bayes', best_nb)
display('decision_tree', best_dt)
display('random_forest', best_rf)
display('gradient_boosting', best_gb)

### Conclusions

TODO