# 04-100 : Features - Random Forest Baseline - [0.664] :: 001

Attempt to replicate the results from `0100_random-forest-baseline-0-664.ipynb`.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import logging
from typing import Any, Dict, List, Tuple, Callable

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mlflow
from tqdm.auto import tqdm

import tensorflow_addons as tfa
import keras as k
from keras import optimizers
import keras_tuner
import keras_tuner as kt

from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from sklearn.metrics import f1_score

from competition import data_preparation as dp
from competition import feature_engineering as fe
from competition import model_data as md
from competition import source_data as sd
import competition.models.simple_dense as sd_model
from competition.models.heatmap_covnet import HeatmapCovnetModel

from competition.model_training import mprint, mflush, mclear
from competition.predict import PredictionBase, Baseline, HeatmapPredictor

2023-04-23 14:07:51.037317: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Configure Logging

In [3]:
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S',
        handlers=[
        logging.StreamHandler(sys.stdout)
    ])

logging.info("Started")

2023-04-23 14:07:53 INFO     Started


## Data Collection

In [4]:
# load the source training set
df_source = sd.read_csv('../data/train.csv.gz',
                        compression='gzip',
                        dtype=sd.source_dtype)

(13174211, 20)


Unnamed: 0,session_id,index,elapsed_time,event_name,name,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,text,fqid,room_fqid,text_fqid,fullscreen,hq,music,level_group
0,20090312431273200,0,0,cutscene_click,basic,0,,-413.991394,-159.314682,380.0,494.0,,undefined,intro,tunic.historicalsociety.closet,tunic.historicalsociety.closet.intro,,,,0-4
1,20090312431273200,1,1323,person_click,basic,0,,-413.991394,-159.314682,380.0,494.0,,"Whatcha doing over there, Jo?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
2,20090312431273200,2,831,person_click,basic,0,,-413.991394,-159.314682,380.0,494.0,,Just talking to Teddy.,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4


In [5]:
# load the source training labels
df_source_labels = sd.read_csv('../data/train_labels.csv')

(212022, 2)


Unnamed: 0,session_id,correct
0,20090312431273200_q1,1
1,20090312433251036_q1,0
2,20090314121766812_q1,1


## Data Preparation & Cleaning

In [6]:
# prepare the main dataset
df_source = dp.prepare_main_dataset(df_source)

# remove sessions with problems
problem_sessions = dp.find_problem_sessions(df_source)
df_source = df_source[~df_source['session_id'].isin(problem_sessions)]

In [7]:
# prepare the label dataset
df_source_labels = dp.prepare_label_dataset(df_source_labels)

# remove sessions with problems
df_source_labels = df_source_labels[~df_source_labels['session_id'].isin(problem_sessions)]

## Feature Engineering

In [8]:
# create the initial features
df_features = fe.create_initial_features(df_source, df_source_labels)

In [9]:
# add the elapsed time feature to the features dataset
df_features = fe.add_elapsed_time_features(
    features=df_features,
    X=df_source)

In [10]:
# add the total count features to the features dataset
df_features = fe.add_count_total_features(
    features=df_features,
    X=df_source)

In [11]:
# add the unique count features to the features dataset
df_features = fe.add_count_unique_features(
    features=df_features,
    X=df_source)

In [12]:
# add the numeric features to the features dataset
df_features = fe.add_numeric_features(
    features=df_features,
    X=df_source)

In [13]:
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(pd.DataFrame(df_features.dtypes))

## Data Selection

In [14]:
random_state = 51

In [15]:
# split the dataset into train, validation and test sets
train, val, test = md.select_sessions(
    y=df_source_labels,
    random_state=random_state,
    test_size=0.60,
    train_size=0.75)

Train: 3495
Validation: 1165
Test: 6988


## Model Training

### Functions

In [None]:
# create the hyperparameter object
def define_tune_parameters(hp):
    hp.Int('dense_layer_count', min_value=1, max_value=6, step=1)
    hp.Int('dense_units', min_value=512, max_value=1700, step=32)
    hp.Choice('dense_activation', values=['relu', 'tanh', 'LeakyReLU'])
    hp.Float('dense_l1_regularization', min_value=0.0, max_value=0.0005, step=0.00001)
    hp.Float('dense_l2_regularization', min_value=0.0, max_value=0.001, step=0.0001)
    hp.Float('dense_dropout', min_value=0.005, max_value=0.1, step=0.005)
    hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4, 1e-5, 1e-6])

In [None]:
def perform_experiment(experiment_name:str,
                       train:np.ndarray,
                       val:np.ndarray,
                       test:np.ndarray,
                       labels:pd.DataFrame,
                       features:pd.DataFrame,
                       feature_list:List[str],
                       define_tune_parameters:Callable,
                       max_trials:int=50,
                       tune_patience:int=10) ->None:
    """
    Performs an experiment with the given features and hyperparameter tuning.
    """
    # create the simple model dataset
    simple_model_dataset = md.get_feature_dataset(
        features=features,
        y=labels,
        feature_list=feature_list,
        train=train,
        val=val,
        test=test,
        include_question=True,
        expand_question=False)
    
    # convert the labels for multi-label classification
    cat_features_dataset = md.labels_to_categorical(simple_model_dataset)

    # get the shape of the question only dataset
    input_data = cat_features_dataset['train']['X']
    features_dataset_shape = input_data.shape[1]
    print('features_dataset_shape:', features_dataset_shape)

    # define the output shape
    output_data = cat_features_dataset['train']['y']
    output_shape = output_data.shape[1]
    print('output_shape', output_shape)

    # create the experiment
    mlflow.set_experiment(experiment_name)
    mlflow.end_run()

    # find the best model
    for batch_size in [500, 1000, 2000, 3000, 4000]:
        for optimizer in [optimizers.Adam, optimizers.RMSprop]:
            sd_model.tune_model(
                define_tune_parameters=define_tune_parameters,
                dataset=cat_features_dataset,
                max_trials=max_trials,
                input_shape=features_dataset_shape,
                output_shape=output_shape,
                dense_layer_count='dense_layer_count',
                dense_units='dense_units',
                dense_activation='dense_activation',
                dense_l1_regularization='dense_l1_regularization',
                dense_l2_regularization='dense_l2_regularization',
                dense_dropout='dense_dropout',
                train_epochs=2000,
                train_batch_size=batch_size,
                train_optimizer=optimizer,
                train_learning_rate='learning_rate',
                train_loss='categorical_crossentropy',
                train_metrics=[tfa.metrics.F1Score(name='f1_score', num_classes=2, threshold=0.5, average='macro')],
                train_class_weight=None,
                tune_objective='val_f1_score',
                tune_direction='max',
                tuner_type=kt.tuners.BayesianOptimization,
                tune_patience=tune_patience)    

### Baseline Model Features

In [None]:
perform_experiment(
    experiment_name='04_000_baseline',
    train=train,
    val=val,
    test=test,
    labels=df_source_labels,
    features=df_features,
    feature_list=['elapsed_time_sum', 'elapsed_time_max', 'elapsed_time_min', 'elapsed_time_mean', 'elapsed_time_mode'],
    define_tune_parameters=define_tune_parameters,
)

### Create Datasets

In [None]:
feature_list = [
    'count_unique_event_name',
    'count_unique_name',
    'count_unique_fqid',
    'count_unique_room_fqid',
    'count_unique_text_fqid',
    'elapsed_time_mean',
    'level_mean',
    'page_mean',
    'room_coor_x_mean',
    'room_coor_y_mean',
    'screen_coor_x_mean',
    'screen_coor_y_mean',
    'hover_duration_mean',
    'elapsed_time_std',
    'level_std',
    'page_std',
    'room_coor_x_std',
    'room_coor_y_std',
    'screen_coor_x_std',
    'screen_coor_y_std',
    'hover_duration_std'
]

In [None]:
# set all nan values to 0
df_features = df_features.fillna(0)

In [None]:
# create the simple model dataset
simple_model_dataset = md.get_feature_dataset(
    features=df_features,
    y=df_source_labels,
    feature_list=feature_list,
    train=train,
    val=val,
    test=test,
    include_question=True,
    expand_question=False)

In [None]:
# convert the labels for multi-label classification
cat_features_dataset = md.labels_to_categorical(simple_model_dataset)

In [None]:
# get the shape of the question only dataset
input_data = cat_features_dataset['train']['X']
features_dataset_shape = input_data.shape[1]
print('features_dataset_shape:', features_dataset_shape)

# define the output shape
output_data = cat_features_dataset['train']['y']
output_shape = output_data.shape[1]
print('output_shape', output_shape)

### Training

In [None]:
# create the experiment
mlflow.set_experiment("04_001")

In [None]:
mlflow.end_run()

In [None]:
# create the hyperparameter object
def define_tune_parameters(hp):
    hp.Int('dense_layer_count', min_value=1, max_value=6, step=1)
    hp.Int('dense_units', min_value=512, max_value=1700, step=32)
    hp.Choice('dense_activation', values=['relu', 'tanh', 'LeakyReLU'])
    hp.Float('dense_l1_regularization', min_value=0.0, max_value=0.0005, step=0.00001)
    hp.Float('dense_l2_regularization', min_value=0.0, max_value=0.001, step=0.0001)
    hp.Float('dense_dropout', min_value=0.005, max_value=0.1, step=0.005)
    hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4, 1e-5, 1e-6])

In [None]:
# find the best model
for batch_size in [500, 1000, 2000, 3000, 4000]:
    for optimizer in [optimizers.Adam, optimizers.RMSprop]:
        sd_model.tune_model(
            define_tune_parameters=define_tune_parameters,
            dataset=cat_features_dataset,
            max_trials=50,
            input_shape=features_dataset_shape,
            output_shape=output_shape,
            dense_layer_count='dense_layer_count',
            dense_units='dense_units',
            dense_activation='dense_activation',
            dense_l1_regularization='dense_l1_regularization',
            dense_l2_regularization='dense_l2_regularization',
            dense_dropout='dense_dropout',
            train_epochs=2000,
            train_batch_size=batch_size,
            train_optimizer=optimizer,
            train_learning_rate='learning_rate',
            train_loss='categorical_crossentropy',
            train_metrics=[tfa.metrics.F1Score(name='f1_score', num_classes=2, threshold=0.5, average='macro')],
            train_class_weight=None,
            tune_objective='val_f1_score',
            tune_direction='max',
            tuner_type=kt.tuners.BayesianOptimization,
            tune_patience=10)

## PyCaret

reference: https://pycaret.gitbook.io/docs/learn-pycaret/official-blog/predicting-crashes-in-gold-prices-using-pycaret#ensemble-models

In [36]:
!pip install xgboost==1.6.2

[0m

In [38]:
!pip install catboost==1.1.1

[0m

In [32]:
from pycaret.classification import *
from sklearn.metrics import classification_report

In [17]:
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(pd.DataFrame(df_features.dtypes))

### Create Datasets

In [18]:
def get_features_with_labels(features:pd.DataFrame,
                            y:pd.DataFrame) -> pd.DataFrame:
    """
    Create a complete dataset where the label dataset containing the
    target variable is joint to the features dataset.
    """
    return y.join(features.set_index(['session_id', 'level_group']),
                    on=['session_id', 'level_group'],
                    how='left')

In [19]:
# combine the features and labels
df_combined = get_features_with_labels(features=df_features, y=df_source_labels)

with pd.option_context('display.max_columns', None):
    display(df_combined.head(2))

Unnamed: 0,session_id,question_num,correct,level_group,elapsed_time_sum,elapsed_time_max,elapsed_time_min,elapsed_time_mean,elapsed_time_mode,elapsed_time_std,count_total_event_name,count_total_name,count_total_fqid,count_total_room_fqid,count_total_text_fqid,count_total_level,count_unique_event_name,count_unique_name,count_unique_fqid,count_unique_room_fqid,count_unique_text_fqid,count_unique_level,level_sum,level_max,level_min,level_mean,level_mode,level_std,page_sum,page_max,page_min,page_mean,page_mode,page_std,room_coor_x_sum,room_coor_x_max,room_coor_x_min,room_coor_x_mean,room_coor_x_mode,room_coor_x_std,room_coor_y_sum,room_coor_y_max,room_coor_y_min,room_coor_y_mean,room_coor_y_mode,room_coor_y_std,screen_coor_x_sum,screen_coor_x_max,screen_coor_x_min,screen_coor_x_mean,screen_coor_x_mode,screen_coor_x_std,screen_coor_y_sum,screen_coor_y_max,screen_coor_y_min,screen_coor_y_mean,screen_coor_y_mode,screen_coor_y_std,hover_duration_sum,hover_duration_max,hover_duration_min,hover_duration_mean,hover_duration_mode,hover_duration_std
0,20090312431273200,1,1,0-4,0.008799,0.052789,0.0,0.023242,0.0,0.013341,0.088782,0.088782,0.06462,0.088782,0.054054,0.088782,0.75,0.0,0.20339,0.090909,0.225,0.0,0.008384,0.111111,0.0,0.05404,0.083333,0.034194,0.0,,,,0.0,,0.602928,0.878566,0.120715,0.513742,0.351427,0.664471,0.611251,1.0,0.274126,0.52459,0.431725,0.736468,0.108532,0.672249,0.008772,0.357584,0.30303,0.171348,0.112915,0.875934,0.010463,0.493341,0.400598,0.076357,0.13121,1.0,0.005638,0.41306,1.0,0.487796
1,20090312433251036,1,0,0-4,0.008434,0.063325,0.0,0.02645,0.0,0.018252,0.057588,0.057588,0.053312,0.057588,0.050874,0.057588,1.0,0.333333,0.067797,0.0,0.075,0.0,0.005167,0.111111,0.0,0.051958,0.083333,0.034239,0.0,0.0,0.0,0.0,0.0,0.0,0.53136,0.870427,0.082633,0.478427,0.358741,0.68244,0.629244,1.0,0.050337,0.542916,0.491507,0.763788,0.043705,0.697767,0.0,0.285664,0.318182,0.201399,0.075188,0.875934,0.007474,0.474922,0.403587,0.101738,0.100932,1.0,0.005638,0.317745,1.0,0.428841


In [20]:
# get the training, validation and test datasets
df_train = df_combined[df_combined['session_id'].isin(train)].drop(columns=['session_id'])
df_val = df_combined[df_combined['session_id'].isin(val)].drop(columns=['session_id'])
df_test = df_combined[df_combined['session_id'].isin(test)].drop(columns=['session_id'])

with pd.option_context('display.max_columns', None):
    display(df_train.head(2))

Unnamed: 0,question_num,correct,level_group,elapsed_time_sum,elapsed_time_max,elapsed_time_min,elapsed_time_mean,elapsed_time_mode,elapsed_time_std,count_total_event_name,count_total_name,count_total_fqid,count_total_room_fqid,count_total_text_fqid,count_total_level,count_unique_event_name,count_unique_name,count_unique_fqid,count_unique_room_fqid,count_unique_text_fqid,count_unique_level,level_sum,level_max,level_min,level_mean,level_mode,level_std,page_sum,page_max,page_min,page_mean,page_mode,page_std,room_coor_x_sum,room_coor_x_max,room_coor_x_min,room_coor_x_mean,room_coor_x_mode,room_coor_x_std,room_coor_y_sum,room_coor_y_max,room_coor_y_min,room_coor_y_mean,room_coor_y_mode,room_coor_y_std,screen_coor_x_sum,screen_coor_x_max,screen_coor_x_min,screen_coor_x_mean,screen_coor_x_mode,screen_coor_x_std,screen_coor_y_sum,screen_coor_y_max,screen_coor_y_min,screen_coor_y_mean,screen_coor_y_mode,screen_coor_y_std,hover_duration_sum,hover_duration_max,hover_duration_min,hover_duration_mean,hover_duration_mode,hover_duration_std
1,1,0,0-4,0.008434,0.063325,0.0,0.02645,0.0,0.018252,0.057588,0.057588,0.053312,0.057588,0.050874,0.057588,1.0,0.333333,0.067797,0.0,0.075,0.0,0.005167,0.111111,0.0,0.051958,0.083333,0.034239,0.0,0.0,0.0,0.0,0.0,0.0,0.53136,0.870427,0.082633,0.478427,0.358741,0.68244,0.629244,1.0,0.050337,0.542916,0.491507,0.763788,0.043705,0.697767,0.0,0.285664,0.318182,0.201399,0.075188,0.875934,0.007474,0.474922,0.403587,0.101738,0.100932,1.0,0.005638,0.317745,1.0,0.428841
4,1,1,0-4,0.00514,0.047938,0.0,0.021001,0.0,0.012045,0.019196,0.019196,0.025848,0.019196,0.022258,0.019196,0.5,0.0,0.067797,0.090909,0.05,0.0,0.002373,0.111111,0.0,0.053738,0.055556,0.035234,0.0,,,,0.0,,0.609612,0.879013,0.154895,0.520039,0.470365,0.664119,0.644255,1.0,0.313764,0.560751,0.450186,0.728845,0.03045,0.688198,0.023126,0.343544,0.356459,0.175123,0.031477,0.792227,0.0,0.457029,0.382661,0.07282,0.135595,1.0,0.005307,0.379436,0.165837,0.42497


### Setup Classification Experiment

In [21]:
fix_imbalance_method = 'SMOTE' #'RandomOverSampler'

classifier = setup(
    data=df_train,
    target='correct',
    test_data=df_val,
    session_id=random_state,
    experiment_name='04_001_pycaret',
    fix_imbalance=True,
    fix_imbalance_method=fix_imbalance_method,
    use_gpu=False,
    html=True,
    verbose=True)

Unnamed: 0,Description,Value
0,Session id,51
1,Target,correct
2,Target type,Binary
3,Original data shape,"(83880, 63)"
4,Transformed data shape,"(109686, 65)"
5,Transformed train set shape,"(88716, 65)"
6,Transformed test set shape,"(20970, 65)"
7,Numeric features,61
8,Categorical features,1
9,Rows with missing values,36.2%


### Training

In [22]:
top_model = compare_models(n_select=15, sort='F1', turbo=True)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
qda,Quadratic Discriminant Analysis,0.71,0.6336,0.9769,0.716,0.8261,0.0621,0.1009,0.095
et,Extra Trees Classifier,0.5948,0.5129,0.7043,0.7132,0.6984,0.0504,0.076,0.457
rf,Random Forest Classifier,0.5733,0.5174,0.6738,0.7139,0.6654,0.0115,0.0339,0.252
lr,Logistic Regression,0.5486,0.614,0.5217,0.7887,0.5802,0.115,0.1591,0.217
lda,Linear Discriminant Analysis,0.5458,0.6168,0.5175,0.7877,0.5762,0.1119,0.1555,0.088
ridge,Ridge Classifier,0.5467,0.0,0.5185,0.7897,0.5761,0.1133,0.1583,0.089
dt,Decision Tree Classifier,0.4146,0.3687,0.4806,0.5822,0.4979,-0.2084,-0.2181,0.086
xgboost,Extreme Gradient Boosting,0.3825,0.3046,0.4551,0.556,0.4506,-0.2767,-0.3078,0.095
ada,Ada Boost Classifier,0.4121,0.4241,0.4519,0.5861,0.449,-0.1765,-0.2001,0.095
nb,Naive Bayes,0.4856,0.6406,0.3685,0.7782,0.4365,0.1321,0.1735,0.085


#### Create Top Models

In [23]:
model_qda = create_model('qda')

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7962,0.6608,1.0,0.7758,0.8737,0.387,0.4898
1,0.7089,0.8901,0.9973,0.7086,0.8285,0.024,0.0894
2,0.7058,0.5598,0.9869,0.7094,0.8255,0.028,0.0678
3,0.7051,0.587,0.9878,0.7087,0.8253,0.0233,0.0595
4,0.7064,0.6141,0.9926,0.7082,0.8266,0.0203,0.062
5,0.7042,0.662,0.9862,0.7085,0.8246,0.0219,0.0539
6,0.7029,0.6302,0.9856,0.7078,0.8239,0.0172,0.0428
7,0.6953,0.6056,0.9714,0.7065,0.818,0.0087,0.017
8,0.674,0.5663,0.8918,0.7158,0.7941,0.0542,0.0627
9,0.7007,0.5604,0.9696,0.711,0.8204,0.0365,0.0646


In [24]:
model_et = create_model('et')

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7709,0.6591,1.0,0.7548,0.8602,0.2888,0.4108
1,0.7088,0.452,0.9202,0.7341,0.8167,0.1503,0.1761
2,0.4878,0.3248,0.5715,0.6574,0.6114,-0.1289,-0.1317
3,0.5066,0.4267,0.6039,0.6654,0.6332,-0.1148,-0.1161
4,0.5667,0.4969,0.6542,0.7088,0.6804,0.011,0.0111
5,0.5796,0.553,0.6952,0.7046,0.6999,-0.0018,-0.0018
6,0.5131,0.4849,0.4887,0.7317,0.586,0.0489,0.055
7,0.4956,0.538,0.4732,0.7152,0.5695,0.0182,0.0206
8,0.5789,0.5048,0.647,0.726,0.6842,0.0587,0.0596
9,0.7395,0.6885,0.989,0.734,0.8426,0.1738,0.276


In [25]:
model_rf = create_model('rf')

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7104,0.6678,1.0,0.7088,0.8296,0.0256,0.1139
1,0.7454,0.4838,0.9303,0.7614,0.8374,0.2763,0.3084
2,0.3631,0.2192,0.4283,0.5636,0.4867,-0.3125,-0.3334
3,0.5378,0.4855,0.628,0.6889,0.6571,-0.0472,-0.0477
4,0.6287,0.5264,0.7239,0.7429,0.7333,0.1227,0.1228
5,0.5648,0.4776,0.7317,0.6771,0.7034,-0.1089,-0.11
6,0.5316,0.549,0.5153,0.7415,0.6081,0.0707,0.0782
7,0.4179,0.6236,0.2137,0.8449,0.3411,0.0789,0.1428
8,0.5257,0.4285,0.5701,0.7013,0.6289,-0.0093,-0.0097
9,0.7078,0.7131,0.9968,0.708,0.8279,0.0189,0.0733


In [26]:
model_lr = create_model('lr')

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.746,0.7181,1.0,0.7351,0.8473,0.1854,0.3197
1,0.6999,0.476,0.892,0.7374,0.8073,0.1555,0.171
2,0.5029,0.6891,0.3519,0.861,0.4996,0.1531,0.2175
3,0.478,0.6274,0.339,0.8103,0.4781,0.1063,0.1493
4,0.4909,0.6349,0.3611,0.8128,0.5001,0.117,0.1595
5,0.4589,0.6902,0.2761,0.8639,0.4185,0.1168,0.1878
6,0.3699,0.6116,0.1303,0.845,0.2258,0.0461,0.1072
7,0.632,0.6607,0.6486,0.7919,0.7131,0.2141,0.2225
8,0.5861,0.528,0.6743,0.7207,0.6967,0.0473,0.0476
9,0.5211,0.5038,0.544,0.7091,0.6156,0.0088,0.0094


#### Hyper-Parameter Tuning

In [27]:
tuned_qda = tune_model(model_qda, optimize='F1', n_iter=50)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7059,0.6511,0.9977,0.7063,0.8271,0.0089,0.0453
1,0.7066,0.9964,0.9964,0.7072,0.8272,0.0145,0.0582
2,0.701,0.9826,0.9885,0.7056,0.8234,0.0027,0.0083
3,0.7035,0.9707,0.9867,0.7079,0.8244,0.0181,0.046
4,0.701,0.9462,0.9847,0.7067,0.8228,0.0101,0.0254
5,0.7032,0.881,0.9822,0.709,0.8236,0.025,0.0555
6,0.6973,0.5668,0.9777,0.7061,0.82,0.0063,0.0139
7,0.6896,0.6588,0.9596,0.7059,0.8134,0.0044,0.0076
8,0.6889,0.8027,0.9581,0.7059,0.8129,0.0045,0.0075
9,0.781,0.832,0.9675,0.7767,0.8617,0.3646,0.4218


Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [28]:
tuned_et = tune_model(model_et, optimize='F1', n_iter=50)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.705,0.5,1.0,0.705,0.827,0.0,0.0
1,0.705,0.5,1.0,0.705,0.827,0.0,0.0
2,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
3,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
4,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
5,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
6,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
7,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
8,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
9,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0


Fitting 10 folds for each of 50 candidates, totalling 500 fits


In [29]:
tuned_rf = tune_model(model_rf, optimize='F1', n_iter=50)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.705,0.5,1.0,0.705,0.827,0.0,0.0
1,0.705,0.5,1.0,0.705,0.827,0.0,0.0
2,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
3,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
4,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
5,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
6,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
7,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
8,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0
9,0.7051,0.5,1.0,0.7051,0.8271,0.0,0.0


Fitting 10 folds for each of 50 candidates, totalling 500 fits


In [30]:
tuned_lr = tune_model(model_lr, optimize='F1', n_iter=50)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7465,0.7159,1.0,0.7355,0.8476,0.1875,0.3216
1,0.6966,0.4838,0.885,0.7372,0.8044,0.1525,0.1659
2,0.5153,0.6939,0.3742,0.8588,0.5213,0.163,0.2244
3,0.4823,0.6276,0.3503,0.8056,0.4883,0.1063,0.1465
4,0.4958,0.6351,0.3724,0.8098,0.5102,0.1187,0.159
5,0.4673,0.6883,0.2928,0.8586,0.4367,0.1217,0.1893
6,0.3775,0.6092,0.1443,0.8421,0.2463,0.0505,0.1113
7,0.6326,0.6583,0.6513,0.7909,0.7143,0.2132,0.2212
8,0.585,0.5267,0.6704,0.7213,0.6949,0.0486,0.0489
9,0.5188,0.5046,0.5406,0.708,0.6131,0.0064,0.0068


Fitting 10 folds for each of 50 candidates, totalling 500 fits


### Evaluate Models : Unseen Dataset

In [31]:
df_qda = predict_model(estimator=tuned_qda, data=df_test)
print(classification_report(y_true=df_qda.correct, y_pred=df_qda.prediction_label))

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Quadratic Discriminant Analysis,0.6539,0.5348,0.8692,0.7062,0.7792,0.0167,0.0188


              precision    recall  f1-score   support

           0       0.32      0.14      0.20     37388
           1       0.71      0.87      0.78     88396

    accuracy                           0.65    125784
   macro avg       0.51      0.51      0.49    125784
weighted avg       0.59      0.65      0.61    125784



In [33]:
df_et = predict_model(estimator=tuned_et, data=df_test)
print(classification_report(y_true=df_et.correct, y_pred=df_et.prediction_label))

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Extra Trees Classifier,0.7028,0.5,1.0,0.7028,0.8254,0.0,0.0


              precision    recall  f1-score   support

           0       0.00      0.00      0.00     37388
           1       0.70      1.00      0.83     88396

    accuracy                           0.70    125784
   macro avg       0.35      0.50      0.41    125784
weighted avg       0.49      0.70      0.58    125784



In [34]:
df_rf = predict_model(estimator=tuned_rf, data=df_test)
print(classification_report(y_true=df_rf.correct, y_pred=df_rf.prediction_label))

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.7028,0.5,1.0,0.7028,0.8254,0.0,0.0


              precision    recall  f1-score   support

           0       0.00      0.00      0.00     37388
           1       0.70      1.00      0.83     88396

    accuracy                           0.70    125784
   macro avg       0.35      0.50      0.41    125784
weighted avg       0.49      0.70      0.58    125784



In [35]:
df_lr = predict_model(estimator=tuned_lr, data=df_test)
print(classification_report(y_true=df_lr.correct, y_pred=df_lr.prediction_label))

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.6156,0.6654,0.6131,0.793,0.6916,0.2032,0.2153


              precision    recall  f1-score   support

           0       0.40      0.62      0.49     37388
           1       0.79      0.61      0.69     88396

    accuracy                           0.62    125784
   macro avg       0.60      0.62      0.59    125784
weighted avg       0.68      0.62      0.63    125784

