# 04-100 : Features - Random Forest Baseline - [0.664] :: 001

Attempt to replicate the results from `0100_random-forest-baseline-0-664.ipynb`.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import logging
from typing import Any, Dict, List, Tuple, Callable

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mlflow
from tqdm.auto import tqdm

import tensorflow_addons as tfa
import keras as k
from keras import optimizers
import keras_tuner
import keras_tuner as kt

from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from sklearn.metrics import f1_score

from competition import data_preparation as dp
from competition import feature_engineering as fe
from competition import model_data as md
from competition import source_data as sd
import competition.models.simple_dense as sd_model
from competition.models.heatmap_covnet import HeatmapCovnetModel

from competition.model_training import mprint, mflush, mclear
from competition.predict import PredictionBase, Baseline, HeatmapPredictor

2023-04-23 17:05:47.718469: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Configure Logging

In [3]:
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S',
        handlers=[
        logging.StreamHandler(sys.stdout)
    ])

logging.info("Started")

2023-04-23 17:05:53 INFO     Started


## Data Collection

In [4]:
# load the source training set
df_source = sd.read_csv('../data/train.csv.gz',
                        compression='gzip',
                        dtype=sd.source_dtype)

(13174211, 20)


Unnamed: 0,session_id,index,elapsed_time,event_name,name,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,text,fqid,room_fqid,text_fqid,fullscreen,hq,music,level_group
0,20090312431273200,0,0,cutscene_click,basic,0,,-413.991394,-159.314682,380.0,494.0,,undefined,intro,tunic.historicalsociety.closet,tunic.historicalsociety.closet.intro,,,,0-4
1,20090312431273200,1,1323,person_click,basic,0,,-413.991394,-159.314682,380.0,494.0,,"Whatcha doing over there, Jo?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
2,20090312431273200,2,831,person_click,basic,0,,-413.991394,-159.314682,380.0,494.0,,Just talking to Teddy.,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4


In [5]:
# load the source training labels
df_source_labels = sd.read_csv('../data/train_labels.csv')

(212022, 2)


Unnamed: 0,session_id,correct
0,20090312431273200_q1,1
1,20090312433251036_q1,0
2,20090314121766812_q1,1


## Data Preparation & Cleaning

In [6]:
# prepare the main dataset
df_source = dp.prepare_main_dataset(df_source)

# remove sessions with problems
problem_sessions = dp.find_problem_sessions(df_source)
df_source = df_source[~df_source['session_id'].isin(problem_sessions)]

In [7]:
# prepare the label dataset
df_source_labels = dp.prepare_label_dataset(df_source_labels)

# remove sessions with problems
df_source_labels = df_source_labels[~df_source_labels['session_id'].isin(problem_sessions)]

## Feature Engineering

In [8]:
# create the initial features
df_features = fe.create_initial_features(df_source, df_source_labels)

In [9]:
# add the elapsed time feature to the features dataset
df_features = fe.add_elapsed_time_features(
    features=df_features,
    X=df_source)

In [10]:
# add the total count features to the features dataset
df_features = fe.add_count_total_features(
    features=df_features,
    X=df_source)

In [11]:
# add the unique count features to the features dataset
df_features = fe.add_count_unique_features(
    features=df_features,
    X=df_source)

In [12]:
# add the numeric features to the features dataset
df_features = fe.add_numeric_features(
    features=df_features,
    X=df_source)

In [13]:
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(pd.DataFrame(df_features.dtypes))

## Data Selection

In [14]:
random_state = 51

In [15]:
# split the dataset into train, validation and test sets
train, val, test = md.select_sessions(
    y=df_source_labels,
    random_state=random_state,
    test_size=0.60,
    train_size=0.75)

Train: 3495
Validation: 1165
Test: 6988


## Model Training

### Functions

In [None]:
# create the hyperparameter object
def define_tune_parameters(hp):
    hp.Int('dense_layer_count', min_value=1, max_value=6, step=1)
    hp.Int('dense_units', min_value=512, max_value=1700, step=32)
    hp.Choice('dense_activation', values=['relu', 'tanh', 'LeakyReLU'])
    hp.Float('dense_l1_regularization', min_value=0.0, max_value=0.0005, step=0.00001)
    hp.Float('dense_l2_regularization', min_value=0.0, max_value=0.001, step=0.0001)
    hp.Float('dense_dropout', min_value=0.005, max_value=0.1, step=0.005)
    hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4, 1e-5, 1e-6])

In [None]:
def perform_experiment(experiment_name:str,
                       train:np.ndarray,
                       val:np.ndarray,
                       test:np.ndarray,
                       labels:pd.DataFrame,
                       features:pd.DataFrame,
                       feature_list:List[str],
                       define_tune_parameters:Callable,
                       max_trials:int=50,
                       tune_patience:int=10) ->None:
    """
    Performs an experiment with the given features and hyperparameter tuning.
    """
    # create the simple model dataset
    simple_model_dataset = md.get_feature_dataset(
        features=features,
        y=labels,
        feature_list=feature_list,
        train=train,
        val=val,
        test=test,
        include_question=True,
        expand_question=False)
    
    # convert the labels for multi-label classification
    cat_features_dataset = md.labels_to_categorical(simple_model_dataset)

    # get the shape of the question only dataset
    input_data = cat_features_dataset['train']['X']
    features_dataset_shape = input_data.shape[1]
    print('features_dataset_shape:', features_dataset_shape)

    # define the output shape
    output_data = cat_features_dataset['train']['y']
    output_shape = output_data.shape[1]
    print('output_shape', output_shape)

    # create the experiment
    mlflow.set_experiment(experiment_name)
    mlflow.end_run()

    # find the best model
    for batch_size in [500, 1000, 2000, 3000, 4000]:
        for optimizer in [optimizers.Adam, optimizers.RMSprop]:
            sd_model.tune_model(
                define_tune_parameters=define_tune_parameters,
                dataset=cat_features_dataset,
                max_trials=max_trials,
                input_shape=features_dataset_shape,
                output_shape=output_shape,
                dense_layer_count='dense_layer_count',
                dense_units='dense_units',
                dense_activation='dense_activation',
                dense_l1_regularization='dense_l1_regularization',
                dense_l2_regularization='dense_l2_regularization',
                dense_dropout='dense_dropout',
                train_epochs=2000,
                train_batch_size=batch_size,
                train_optimizer=optimizer,
                train_learning_rate='learning_rate',
                train_loss='categorical_crossentropy',
                train_metrics=[tfa.metrics.F1Score(name='f1_score', num_classes=2, threshold=0.5, average='macro')],
                train_class_weight=None,
                tune_objective='val_f1_score',
                tune_direction='max',
                tuner_type=kt.tuners.BayesianOptimization,
                tune_patience=tune_patience)    

### Baseline Model Features

In [None]:
perform_experiment(
    experiment_name='04_000_baseline',
    train=train,
    val=val,
    test=test,
    labels=df_source_labels,
    features=df_features,
    feature_list=['elapsed_time_sum', 'elapsed_time_max', 'elapsed_time_min', 'elapsed_time_mean', 'elapsed_time_mode'],
    define_tune_parameters=define_tune_parameters,
)

### Create Datasets

In [None]:
feature_list = [
    'count_unique_event_name',
    'count_unique_name',
    'count_unique_fqid',
    'count_unique_room_fqid',
    'count_unique_text_fqid',
    'elapsed_time_mean',
    'level_mean',
    'page_mean',
    'room_coor_x_mean',
    'room_coor_y_mean',
    'screen_coor_x_mean',
    'screen_coor_y_mean',
    'hover_duration_mean',
    'elapsed_time_std',
    'level_std',
    'page_std',
    'room_coor_x_std',
    'room_coor_y_std',
    'screen_coor_x_std',
    'screen_coor_y_std',
    'hover_duration_std'
]

In [None]:
# set all nan values to 0
df_features = df_features.fillna(0)

In [None]:
# create the simple model dataset
simple_model_dataset = md.get_feature_dataset(
    features=df_features,
    y=df_source_labels,
    feature_list=feature_list,
    train=train,
    val=val,
    test=test,
    include_question=True,
    expand_question=False)

In [None]:
# convert the labels for multi-label classification
cat_features_dataset = md.labels_to_categorical(simple_model_dataset)

In [None]:
# get the shape of the question only dataset
input_data = cat_features_dataset['train']['X']
features_dataset_shape = input_data.shape[1]
print('features_dataset_shape:', features_dataset_shape)

# define the output shape
output_data = cat_features_dataset['train']['y']
output_shape = output_data.shape[1]
print('output_shape', output_shape)

### Training

In [None]:
# create the experiment
mlflow.set_experiment("04_001")

In [None]:
mlflow.end_run()

In [None]:
# create the hyperparameter object
def define_tune_parameters(hp):
    hp.Int('dense_layer_count', min_value=1, max_value=6, step=1)
    hp.Int('dense_units', min_value=512, max_value=1700, step=32)
    hp.Choice('dense_activation', values=['relu', 'tanh', 'LeakyReLU'])
    hp.Float('dense_l1_regularization', min_value=0.0, max_value=0.0005, step=0.00001)
    hp.Float('dense_l2_regularization', min_value=0.0, max_value=0.001, step=0.0001)
    hp.Float('dense_dropout', min_value=0.005, max_value=0.1, step=0.005)
    hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4, 1e-5, 1e-6])

In [None]:
# find the best model
for batch_size in [500, 1000, 2000, 3000, 4000]:
    for optimizer in [optimizers.Adam, optimizers.RMSprop]:
        sd_model.tune_model(
            define_tune_parameters=define_tune_parameters,
            dataset=cat_features_dataset,
            max_trials=50,
            input_shape=features_dataset_shape,
            output_shape=output_shape,
            dense_layer_count='dense_layer_count',
            dense_units='dense_units',
            dense_activation='dense_activation',
            dense_l1_regularization='dense_l1_regularization',
            dense_l2_regularization='dense_l2_regularization',
            dense_dropout='dense_dropout',
            train_epochs=2000,
            train_batch_size=batch_size,
            train_optimizer=optimizer,
            train_learning_rate='learning_rate',
            train_loss='categorical_crossentropy',
            train_metrics=[tfa.metrics.F1Score(name='f1_score', num_classes=2, threshold=0.5, average='macro')],
            train_class_weight=None,
            tune_objective='val_f1_score',
            tune_direction='max',
            tuner_type=kt.tuners.BayesianOptimization,
            tune_patience=10)

## PyCaret

reference: https://pycaret.gitbook.io/docs/learn-pycaret/official-blog/predicting-crashes-in-gold-prices-using-pycaret#ensemble-models

In [16]:
#!pip install xgboost==1.6.2

In [17]:
#!pip install catboost==1.1.1

In [131]:
from pycaret.classification import *
import sklearn
from sklearn.metrics import classification_report
import pickle

In [19]:
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(pd.DataFrame(df_features.dtypes))

### Create Datasets

In [17]:
def get_features_with_labels(features:pd.DataFrame,
                            y:pd.DataFrame) -> pd.DataFrame:
    """
    Create a complete dataset where the label dataset containing the
    target variable is joint to the features dataset.
    """
    return y.join(features.set_index(['session_id', 'level_group']),
                    on=['session_id', 'level_group'],
                    how='left')

In [18]:
# combine the features and labels
df_combined = get_features_with_labels(features=df_features, y=df_source_labels)

with pd.option_context('display.max_columns', None):
    display(df_combined.head(2))

Unnamed: 0,session_id,question_num,correct,level_group,elapsed_time_sum,elapsed_time_max,elapsed_time_min,elapsed_time_mean,elapsed_time_mode,elapsed_time_std,count_total_event_name,count_total_name,count_total_fqid,count_total_room_fqid,count_total_text_fqid,count_total_level,count_unique_event_name,count_unique_name,count_unique_fqid,count_unique_room_fqid,count_unique_text_fqid,count_unique_level,level_sum,level_max,level_min,level_mean,level_mode,level_std,page_sum,page_max,page_min,page_mean,page_mode,page_std,room_coor_x_sum,room_coor_x_max,room_coor_x_min,room_coor_x_mean,room_coor_x_mode,room_coor_x_std,room_coor_y_sum,room_coor_y_max,room_coor_y_min,room_coor_y_mean,room_coor_y_mode,room_coor_y_std,screen_coor_x_sum,screen_coor_x_max,screen_coor_x_min,screen_coor_x_mean,screen_coor_x_mode,screen_coor_x_std,screen_coor_y_sum,screen_coor_y_max,screen_coor_y_min,screen_coor_y_mean,screen_coor_y_mode,screen_coor_y_std,hover_duration_sum,hover_duration_max,hover_duration_min,hover_duration_mean,hover_duration_mode,hover_duration_std
0,20090312431273200,1,1,0-4,0.008799,0.052789,0.0,0.023242,0.0,0.013341,0.088782,0.088782,0.06462,0.088782,0.054054,0.088782,0.75,0.0,0.20339,0.090909,0.225,0.0,0.008384,0.111111,0.0,0.05404,0.083333,0.034194,0.0,,,,0.0,,0.602928,0.878566,0.120715,0.513742,0.351427,0.664471,0.611251,1.0,0.274126,0.52459,0.431725,0.736468,0.108532,0.672249,0.008772,0.357584,0.30303,0.171348,0.112915,0.875934,0.010463,0.493341,0.400598,0.076357,0.13121,1.0,0.005638,0.41306,1.0,0.487796
1,20090312433251036,1,0,0-4,0.008434,0.063325,0.0,0.02645,0.0,0.018252,0.057588,0.057588,0.053312,0.057588,0.050874,0.057588,1.0,0.333333,0.067797,0.0,0.075,0.0,0.005167,0.111111,0.0,0.051958,0.083333,0.034239,0.0,0.0,0.0,0.0,0.0,0.0,0.53136,0.870427,0.082633,0.478427,0.358741,0.68244,0.629244,1.0,0.050337,0.542916,0.491507,0.763788,0.043705,0.697767,0.0,0.285664,0.318182,0.201399,0.075188,0.875934,0.007474,0.474922,0.403587,0.101738,0.100932,1.0,0.005638,0.317745,1.0,0.428841


In [28]:
# get the training, validation and test datasets
df_train = df_combined[df_combined['session_id'].isin(train)]
df_val = df_combined[df_combined['session_id'].isin(val)]
df_test = df_combined[df_combined['session_id'].isin(test)]

with pd.option_context('display.max_columns', None):
    display(df_train.head(2))

Unnamed: 0,session_id,question_num,correct,level_group,elapsed_time_sum,elapsed_time_max,elapsed_time_min,elapsed_time_mean,elapsed_time_mode,elapsed_time_std,count_total_event_name,count_total_name,count_total_fqid,count_total_room_fqid,count_total_text_fqid,count_total_level,count_unique_event_name,count_unique_name,count_unique_fqid,count_unique_room_fqid,count_unique_text_fqid,count_unique_level,level_sum,level_max,level_min,level_mean,level_mode,level_std,page_sum,page_max,page_min,page_mean,page_mode,page_std,room_coor_x_sum,room_coor_x_max,room_coor_x_min,room_coor_x_mean,room_coor_x_mode,room_coor_x_std,room_coor_y_sum,room_coor_y_max,room_coor_y_min,room_coor_y_mean,room_coor_y_mode,room_coor_y_std,screen_coor_x_sum,screen_coor_x_max,screen_coor_x_min,screen_coor_x_mean,screen_coor_x_mode,screen_coor_x_std,screen_coor_y_sum,screen_coor_y_max,screen_coor_y_min,screen_coor_y_mean,screen_coor_y_mode,screen_coor_y_std,hover_duration_sum,hover_duration_max,hover_duration_min,hover_duration_mean,hover_duration_mode,hover_duration_std
1,20090312433251036,1,0,0-4,0.008434,0.063325,0.0,0.02645,0.0,0.018252,0.057588,0.057588,0.053312,0.057588,0.050874,0.057588,1.0,0.333333,0.067797,0.0,0.075,0.0,0.005167,0.111111,0.0,0.051958,0.083333,0.034239,0.0,0.0,0.0,0.0,0.0,0.0,0.53136,0.870427,0.082633,0.478427,0.358741,0.68244,0.629244,1.0,0.050337,0.542916,0.491507,0.763788,0.043705,0.697767,0.0,0.285664,0.318182,0.201399,0.075188,0.875934,0.007474,0.474922,0.403587,0.101738,0.100932,1.0,0.005638,0.317745,1.0,0.428841
4,20090314441803444,1,1,0-4,0.00514,0.047938,0.0,0.021001,0.0,0.012045,0.019196,0.019196,0.025848,0.019196,0.022258,0.019196,0.5,0.0,0.067797,0.090909,0.05,0.0,0.002373,0.111111,0.0,0.053738,0.055556,0.035234,0.0,,,,0.0,,0.609612,0.879013,0.154895,0.520039,0.470365,0.664119,0.644255,1.0,0.313764,0.560751,0.450186,0.728845,0.03045,0.688198,0.023126,0.343544,0.356459,0.175123,0.031477,0.792227,0.0,0.457029,0.382661,0.07282,0.135595,1.0,0.005307,0.379436,0.165837,0.42497


### Setup Classification Experiment

In [62]:
fix_imbalance_method = 'SMOTE' #'RandomOverSampler'

classifier = setup(
    data=pd.concat([df_train, df_val]),
    target='correct',
    test_data=None, #df_val,
    session_id=random_state,
    experiment_name='04_001_pycaret',
    fix_imbalance=False,
    fix_imbalance_method=fix_imbalance_method,
    data_split_shuffle=True,
    data_split_stratify=['session_id'],
    fold_strategy='stratifiedkfold',
    use_gpu=False,
    html=True,
    verbose=True)

Unnamed: 0,Description,Value
0,Session id,51
1,Target,correct
2,Target type,Binary
3,Original data shape,"(83880, 64)"
4,Transformed data shape,"(83880, 66)"
5,Transformed train set shape,"(58715, 66)"
6,Transformed test set shape,"(25165, 66)"
7,Numeric features,62
8,Categorical features,1
9,Rows with missing values,36.2%


### Add Metrics

reference: https://pycaret.readthedocs.io/en/stable/api/classification.html#pycaret.classification.add_metric

In [63]:
def f1_macro(y_true, y_pred, **kwargs):
    return sklearn.metrics.f1_score(y_true, y_pred, average='macro')

add_metric('f1_macro', 'F1 Macro', f1_macro, greater_is_better=True)

Name                                              F1 Macro
Display Name                                      F1 Macro
Score Function       <function f1_macro at 0x7ff91d487550>
Scorer                               make_scorer(f1_macro)
Target                                                pred
Args                                                    {}
Greater is Better                                     True
Multiclass                                            True
Custom                                                True
Name: f1_macro, dtype: object

### Training

In [64]:
top_model = compare_models(n_select=15, 
                           sort='F1 Macro',
                           cross_validation=True,
                           fold=10,
                           turbo=True)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro,TT (Sec)
catboost,CatBoost Classifier,0.7602,0.7799,0.9232,0.7787,0.8448,0.3337,0.3584,0.6587,11.306
xgboost,Extreme Gradient Boosting,0.7532,0.7696,0.9053,0.7807,0.8384,0.3283,0.3451,0.6582,2.076
lightgbm,Light Gradient Boosting Machine,0.7571,0.7766,0.9278,0.7738,0.8438,0.3178,0.346,0.6491,0.296
gbc,Gradient Boosting Classifier,0.7512,0.7643,0.9432,0.7617,0.8428,0.2777,0.3184,0.6233,4.104
rf,Random Forest Classifier,0.6931,0.6942,0.7967,0.7754,0.7859,0.2444,0.2447,0.622,1.638
ada,Ada Boost Classifier,0.7433,0.7491,0.9403,0.7561,0.8382,0.2514,0.291,0.6087,0.976
dt,Decision Tree Classifier,0.6657,0.6001,0.7585,0.7663,0.7624,0.1987,0.1988,0.5993,0.36
et,Extra Trees Classifier,0.6711,0.6631,0.7992,0.7514,0.7746,0.1687,0.1698,0.5833,1.282
qda,Quadratic Discriminant Analysis,0.6855,0.6267,0.8683,0.7356,0.7956,0.1274,0.1383,0.5499,0.179
lda,Linear Discriminant Analysis,0.7179,0.6708,0.954,0.7299,0.827,0.1303,0.1754,0.5311,0.254


#### Evaluate Top Models

In [65]:
def evaluate_models(models:List,
                    data:pd.DataFrame) -> pd.DataFrame:
    f1_scores = []
    for model in models:
        model_name = model.__class__.__name__
        df_pred = predict_model(estimator=model, data=data, verbose=False)
        f1_score = f1_macro(y_true=df_pred.correct, y_pred=df_pred.prediction_label)

        f1_scores.append({ 'model': model_name, 'f1_score': f1_score })

    return pd.DataFrame(f1_scores).sort_values(by='f1_score', ascending=False) 

In [66]:
display(evaluate_models(top_model, df_test))

Unnamed: 0,model,f1_score
1,XGBClassifier,0.637696
0,CatBoostClassifier,0.637175
2,LGBMClassifier,0.635702
7,ExtraTreesClassifier,0.623947
3,GradientBoostingClassifier,0.619119
4,RandomForestClassifier,0.617844
5,AdaBoostClassifier,0.607647
6,DecisionTreeClassifier,0.596548
8,QuadraticDiscriminantAnalysis,0.553086
9,LinearDiscriminantAnalysis,0.522566


#### Select Top Models

In [67]:
selected_models = [
    top_model[1],
    top_model[0],
    top_model[2],
    top_model[7],
    top_model[3],
]

### Hyper-Parameter Tuning

In [72]:
tuned_models = []
for model in selected_models:
    tuned_model = tune_model(estimator=model, optimize='F1 Macro', fold=5, n_iter=50, verbose=True)
    tuned_models.append(tuned_model)

display(evaluate_models(tuned_models, df_val))

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7215,0.7367,0.8184,0.7941,0.806,0.3129,0.3134,0.6563
1,0.7214,0.7409,0.8163,0.7951,0.8056,0.3144,0.3148,0.6571
2,0.7169,0.7425,0.8086,0.7947,0.8016,0.3081,0.3082,0.654
3,0.7192,0.7378,0.816,0.7929,0.8042,0.3078,0.3082,0.6537
4,0.7203,0.7369,0.8108,0.7971,0.8039,0.3164,0.3165,0.6581
Mean,0.7199,0.739,0.814,0.7947,0.8043,0.3119,0.3122,0.6558
Std,0.0017,0.0023,0.0037,0.0014,0.0016,0.0034,0.0034,0.0017


Fitting 5 folds for each of 50 candidates, totalling 250 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7484,0.7595,0.8933,0.7819,0.8339,0.3245,0.3371,0.6576
1,0.7502,0.7665,0.8896,0.7856,0.8343,0.3348,0.3457,0.6635
2,0.749,0.767,0.894,0.7821,0.8343,0.3258,0.3385,0.6582
3,0.751,0.7592,0.9015,0.7804,0.8366,0.3249,0.3403,0.6569
4,0.7492,0.7635,0.8926,0.7831,0.8342,0.3282,0.3404,0.6596
Mean,0.7496,0.7631,0.8942,0.7826,0.8347,0.3277,0.3404,0.6592
Std,0.0009,0.0033,0.004,0.0017,0.001,0.0038,0.0029,0.0024


Fitting 5 folds for each of 50 candidates, totalling 250 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7446,0.7603,0.8776,0.7861,0.8293,0.3282,0.3362,0.6611
1,0.7484,0.7627,0.8762,0.7907,0.8312,0.3425,0.3495,0.6687
2,0.7416,0.7643,0.8725,0.7858,0.8268,0.3236,0.3305,0.6591
3,0.7491,0.7616,0.8832,0.7877,0.8327,0.3377,0.3467,0.6656
4,0.7468,0.761,0.8767,0.7888,0.8304,0.3366,0.344,0.6656
Mean,0.7461,0.762,0.8772,0.7878,0.8301,0.3337,0.3414,0.664
Std,0.0027,0.0014,0.0035,0.0018,0.002,0.0068,0.007,0.0035


Fitting 5 folds for each of 50 candidates, totalling 250 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.6682,0.7549,0.6524,0.8429,0.7355,0.3094,0.3281,0.6453
1,0.6665,0.7587,0.6457,0.8463,0.7325,0.3105,0.331,0.6449
2,0.6759,0.7676,0.6571,0.8505,0.7414,0.3259,0.3459,0.6537
3,0.6815,0.7611,0.6731,0.8449,0.7493,0.3278,0.344,0.6564
4,0.668,0.757,0.6484,0.846,0.7342,0.312,0.3321,0.646
Mean,0.672,0.7598,0.6554,0.8461,0.7386,0.3171,0.3362,0.6493
Std,0.0058,0.0043,0.0097,0.0025,0.0061,0.008,0.0073,0.0048


Fitting 5 folds for each of 50 candidates, totalling 250 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7383,0.7422,0.8697,0.7839,0.8246,0.3153,0.322,0.6549
1,0.7377,0.7444,0.8704,0.7829,0.8243,0.3125,0.3195,0.6534
2,0.7348,0.7478,0.8661,0.7822,0.822,0.3074,0.3136,0.6511
3,0.7375,0.743,0.8747,0.7805,0.825,0.3074,0.3155,0.6503
4,0.7389,0.7438,0.8696,0.7845,0.8249,0.3174,0.324,0.6561
Mean,0.7375,0.7443,0.8701,0.7828,0.8241,0.312,0.3189,0.6532
Std,0.0014,0.0019,0.0028,0.0014,0.0011,0.0041,0.0039,0.0022


Fitting 5 folds for each of 50 candidates, totalling 250 fits


Unnamed: 0,model,f1_score
2,LGBMClassifier,0.772568
4,GradientBoostingClassifier,0.758584
1,CatBoostClassifier,0.753778
0,XGBClassifier,0.715626
3,ExtraTreesClassifier,0.662189


#### Evaluate on Unseen Dataset

The tuned models appears to be performing worse than the baseline model.

In [74]:
display(evaluate_models(tuned_models, df_test))

Unnamed: 0,model,f1_score
3,ExtraTreesClassifier,0.647932
2,LGBMClassifier,0.641288
0,XGBClassifier,0.637696
1,CatBoostClassifier,0.633372
4,GradientBoostingClassifier,0.633313


### Ensemble Models

reference: https://github.com/Riazone/Gold-Return-Prediction/blob/master/Classification/Gold%20Prediction%20Experiment%20%20Classification-%20PyCaret.ipynb

In [87]:
bagging_models = []
for model in tuned_models:
    new_model = ensemble_model(estimator=model,
                                    method='Bagging',
                                    optimize='F1 Macro',
                                    fold=5,
                                    n_estimators=10,
                                    verbose=True)
    bagging_models.append(new_model)

display(evaluate_models(bagging_models, df_test))

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7559,0.7712,0.9133,0.7793,0.841,0.3291,0.3492,0.6576
1,0.7557,0.7741,0.9106,0.7805,0.8405,0.3314,0.3501,0.6592
2,0.7547,0.7801,0.9111,0.7793,0.84,0.3273,0.3465,0.6569
3,0.7601,0.7735,0.9233,0.7786,0.8448,0.3333,0.3581,0.6585
4,0.7588,0.772,0.9159,0.7809,0.843,0.3368,0.3576,0.6614
Mean,0.757,0.7742,0.9149,0.7797,0.8419,0.3316,0.3523,0.6587
Std,0.0021,0.0031,0.0046,0.0008,0.0018,0.0033,0.0047,0.0015


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7547,0.7701,0.9115,0.7791,0.8401,0.3269,0.3463,0.6567
1,0.7562,0.7748,0.9097,0.7814,0.8407,0.3342,0.3524,0.6609
2,0.753,0.7763,0.9102,0.7781,0.839,0.3224,0.3414,0.6544
3,0.7587,0.7698,0.9195,0.7791,0.8435,0.3326,0.3554,0.6586
4,0.7561,0.7706,0.9127,0.7799,0.8411,0.3307,0.3504,0.6585
Mean,0.7557,0.7723,0.9127,0.7795,0.8409,0.3293,0.3492,0.6578
Std,0.0019,0.0027,0.0036,0.0011,0.0015,0.0043,0.0049,0.0022


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7488,0.7672,0.8914,0.7833,0.8338,0.328,0.3398,0.6597
1,0.7507,0.769,0.8892,0.7862,0.8345,0.3367,0.3474,0.6645
2,0.7497,0.7725,0.8896,0.7851,0.8341,0.3331,0.3441,0.6626
3,0.7525,0.7673,0.8973,0.784,0.8368,0.3347,0.348,0.6626
4,0.7537,0.7665,0.8911,0.7882,0.8365,0.3451,0.356,0.6688
Mean,0.7511,0.7685,0.8917,0.7853,0.8351,0.3355,0.3471,0.6636
Std,0.0018,0.0022,0.0029,0.0017,0.0013,0.0056,0.0053,0.003


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.672,0.7528,0.6629,0.8394,0.7408,0.3106,0.327,0.6471
1,0.6668,0.7567,0.6507,0.8421,0.7342,0.3069,0.3257,0.6439
2,0.6766,0.7668,0.6593,0.8496,0.7424,0.326,0.3453,0.6539
3,0.6792,0.7602,0.6724,0.8421,0.7477,0.3222,0.3378,0.6537
4,0.6688,0.7576,0.654,0.8424,0.7363,0.3097,0.328,0.6456
Mean,0.6727,0.7588,0.6599,0.8431,0.7403,0.3151,0.3328,0.6488
Std,0.0046,0.0047,0.0075,0.0034,0.0048,0.0075,0.0076,0.0042


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7456,0.7544,0.8943,0.7787,0.8325,0.3138,0.3272,0.6517
1,0.7469,0.7607,0.8896,0.7823,0.8325,0.3235,0.3349,0.6575
2,0.743,0.7616,0.8882,0.7792,0.8301,0.3115,0.323,0.6512
3,0.7501,0.7548,0.9005,0.7801,0.836,0.323,0.3381,0.6559
4,0.7496,0.7568,0.8915,0.784,0.8343,0.3308,0.3425,0.6611
Mean,0.747,0.7577,0.8928,0.7809,0.8331,0.3205,0.3332,0.6555
Std,0.0026,0.003,0.0044,0.002,0.002,0.007,0.0071,0.0037


Unnamed: 0,model,f1_score
2,BaggingClassifier,0.76341
1,BaggingClassifier,0.748378
4,BaggingClassifier,0.74287
0,BaggingClassifier,0.712195
3,BaggingClassifier,0.65978


In [89]:
display(evaluate_models(bagging_models, df_test))

Unnamed: 0,model,f1_score
3,BaggingClassifier,0.646661
2,BaggingClassifier,0.639287
0,BaggingClassifier,0.637844
1,BaggingClassifier,0.634509
4,BaggingClassifier,0.6334


In [90]:
boosting_models = []
for model in tuned_models:
    boosting_model = ensemble_model(estimator=model,
                                    method='Boosting',
                                    optimize='F1 Macro',
                                    fold=5,
                                    n_estimators=10,
                                    verbose=True)
    boosting_models.append(boosting_model)

display(evaluate_models(boosting_models, df_test))

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.2929,0.5,0.0,0.0,0.0,0.0,0.0,0.2266
1,0.2929,0.5,0.0,0.0,0.0,0.0,0.0,0.2266
2,0.2929,0.5,0.0,0.0,0.0,0.0,0.0,0.2266
3,0.2929,0.5,0.0,0.0,0.0,0.0,0.0,0.2266
4,0.2929,0.5,0.0,0.0,0.0,0.0,0.0,0.2266
Mean,0.2929,0.5,0.0,0.0,0.0,0.0,0.0,0.2266
Std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7268,0.7371,0.8515,0.7816,0.8151,0.2961,0.3001,0.6462
1,0.7312,0.7479,0.8507,0.7865,0.8173,0.3113,0.3148,0.6542
2,0.7295,0.7437,0.8492,0.7856,0.8162,0.3075,0.3109,0.6523
3,0.7271,0.7385,0.8546,0.7803,0.8158,0.2936,0.2982,0.6447
4,0.732,0.7396,0.8541,0.7856,0.8184,0.3105,0.3145,0.6536
Mean,0.7293,0.7414,0.852,0.7839,0.8166,0.3038,0.3077,0.6502
Std,0.0021,0.0039,0.0021,0.0025,0.0012,0.0075,0.0072,0.0039


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
1,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
2,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
3,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
4,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
Mean,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
Std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7059,0.7731,0.722,0.8395,0.7764,0.3539,0.3626,0.6734
1,0.7052,0.7745,0.7149,0.8443,0.7742,0.3579,0.3683,0.6748
2,0.7123,0.78,0.7244,0.8465,0.7807,0.3696,0.3792,0.6812
3,0.7097,0.774,0.7331,0.8361,0.7812,0.3554,0.3622,0.6749
4,0.7041,0.7724,0.7173,0.8408,0.7742,0.3528,0.3623,0.6725
Mean,0.7074,0.7748,0.7224,0.8414,0.7773,0.3579,0.3669,0.6754
Std,0.0031,0.0027,0.0063,0.0036,0.0031,0.0061,0.0066,0.003


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
1,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
2,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
3,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
4,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
Mean,0.7071,0.5,1.0,0.7071,0.8284,0.0,0.0,0.4142
Std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,model,f1_score
3,AdaBoostClassifier,0.665028
1,AdaBoostClassifier,0.637543
2,AdaBoostClassifier,0.412718
4,AdaBoostClassifier,0.412718
0,AdaBoostClassifier,0.229132


### Best Models

In [113]:
trained_models = [
    { 'name': 'selected_models', 'models': selected_models },
    { 'name': 'tuned_models', 'models': tuned_models },
    { 'name': 'bagging_models', 'models': bagging_models },
    { 'name': 'boosting_models', 'models': boosting_models },
]

# evaluate the models
df_models = pd.DataFrame()
for trained_model in trained_models:
    df_model = evaluate_models(trained_model['models'], df_test)
    df_model['model_group'] = trained_model['name']

    df_models = pd.concat([df_models, df_model])

# keep the index to identify the models
df_models = df_models.reset_index(drop=False)

In [116]:
df_models

Unnamed: 0,index,model,f1_score,model_group
0,0,XGBClassifier,0.637696,selected_models
1,1,CatBoostClassifier,0.637175,selected_models
2,2,LGBMClassifier,0.635702,selected_models
3,3,ExtraTreesClassifier,0.623947,selected_models
4,4,GradientBoostingClassifier,0.619119,selected_models
5,3,ExtraTreesClassifier,0.647932,tuned_models
6,2,LGBMClassifier,0.641288,tuned_models
7,0,XGBClassifier,0.637696,tuned_models
8,1,CatBoostClassifier,0.633372,tuned_models
9,4,GradientBoostingClassifier,0.633313,tuned_models


In [127]:
# sort the dataframe by index and f1_score in descending order
df_models_sorted = df_models.sort_values(['index', 'f1_score'], ascending=[True, False])

# drop duplicates by index and model_group, keeping the first occurrence (i.e., the one with the highest f1_score)
result_df = df_models_sorted.drop_duplicates(['index'], keep='first')

# reset the index of the result dataframe
result_df = result_df.reset_index(drop=True)

display(result_df.sort_values(by='f1_score', ascending=False))

Unnamed: 0,index,model,f1_score,model_group
3,3,AdaBoostClassifier,0.665028,boosting_models
2,2,LGBMClassifier,0.641288,tuned_models
0,0,BaggingClassifier,0.637844,bagging_models
1,1,AdaBoostClassifier,0.637543,boosting_models
4,4,BaggingClassifier,0.6334,bagging_models


In [129]:
best_models = [
   boosting_models[3],
   tuned_models[2],
   bagging_models[0],
   boosting_models[1],
   bagging_models[4]
]

In [130]:
display(evaluate_models(best_models, df_test))

Unnamed: 0,model,f1_score
0,AdaBoostClassifier,0.665028
1,LGBMClassifier,0.641288
2,BaggingClassifier,0.637844
3,AdaBoostClassifier,0.637543
4,BaggingClassifier,0.6334


In [135]:
# with open('04-100_best_models.pkl', 'wb') as file:
#     pickle.dump(best_models, file)

In [136]:
# with open('04-100_best_models.pkl', 'rb') as file:
#     my_list = pickle.load(file)

# # print the list to verify that it has been loaded correctly
# print(my_list)

### Blending Models

In [137]:
blend_all = blend_models(
    estimator_list=best_models,
    fold=5,
    optimize='F1 Macro',
    method='auto',
    verbose=True)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7507,0.7665,0.8971,0.7823,0.8358,0.3286,0.3423,0.6594
1,0.7524,0.7713,0.8923,0.7864,0.836,0.3395,0.351,0.6657
2,0.749,0.7732,0.8899,0.7842,0.8337,0.3304,0.3416,0.6611
3,0.7542,0.768,0.9004,0.784,0.8382,0.3369,0.3513,0.6635
4,0.7552,0.7678,0.8943,0.788,0.8378,0.3468,0.3585,0.6694
Mean,0.7523,0.7694,0.8948,0.785,0.8363,0.3364,0.349,0.6638
Std,0.0022,0.0025,0.0037,0.002,0.0016,0.0065,0.0063,0.0035


In [138]:
display(evaluate_models([blend_all], df_test))

Unnamed: 0,model,f1_score
0,VotingClassifier,0.641263


In [139]:
blend_001 = blend_models(
    estimator_list=[
        best_models[1],
        best_models[2],
    ],
    fold=5,
    optimize='F1 Macro',
    method='auto',
    verbose=True)

display(evaluate_models([blend_001], df_test))

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7523,0.7693,0.8993,0.7827,0.837,0.3316,0.3459,0.6607
1,0.7533,0.7719,0.8957,0.7855,0.837,0.3389,0.3516,0.665
2,0.7521,0.7756,0.8959,0.7842,0.8364,0.3346,0.3475,0.6627
3,0.7558,0.771,0.9044,0.7836,0.8397,0.3383,0.3541,0.6638
4,0.7562,0.7699,0.898,0.7872,0.8389,0.3464,0.3595,0.6688
Mean,0.7539,0.7715,0.8987,0.7846,0.8378,0.338,0.3517,0.6642
Std,0.0017,0.0022,0.0032,0.0016,0.0013,0.005,0.0048,0.0027


Unnamed: 0,model,f1_score
0,VotingClassifier,0.640903


In [140]:
blend_002 = blend_models(
    estimator_list=[
        best_models[3],
        best_models[4],
    ],
    fold=5,
    optimize='F1 Macro',
    method='auto',
    verbose=True)

display(evaluate_models([blend_002], df_test))

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7451,0.755,0.8909,0.7799,0.8317,0.316,0.3281,0.6533
1,0.7467,0.7631,0.8862,0.7839,0.8319,0.3265,0.3368,0.6594
2,0.7432,0.7622,0.8845,0.7812,0.8296,0.3161,0.3264,0.654
3,0.7496,0.7563,0.8964,0.7816,0.8351,0.3256,0.3391,0.6578
4,0.753,0.7579,0.8914,0.7874,0.8362,0.3425,0.3536,0.6674
Mean,0.7475,0.7589,0.8899,0.7828,0.8329,0.3253,0.3368,0.6584
Std,0.0035,0.0032,0.0042,0.0026,0.0024,0.0097,0.0097,0.0051


Unnamed: 0,model,f1_score
0,VotingClassifier,0.635205


In [141]:
blend_003 = blend_models(
    estimator_list=[
        best_models[1],
        best_models[4],
    ],
    fold=5,
    optimize='F1 Macro',
    method='auto',
    verbose=True)

display(evaluate_models([blend_003], df_test))

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7478,0.7613,0.8917,0.7822,0.8334,0.3244,0.3365,0.6577
1,0.7493,0.7654,0.8863,0.7863,0.8333,0.335,0.345,0.6639
2,0.7443,0.7667,0.8841,0.7825,0.8302,0.3202,0.3302,0.6563
3,0.7498,0.7622,0.8931,0.7834,0.8346,0.3297,0.342,0.6604
4,0.7522,0.7627,0.8892,0.7877,0.8354,0.3418,0.3523,0.6673
Mean,0.7487,0.7636,0.8889,0.7844,0.8334,0.3303,0.3412,0.6611
Std,0.0026,0.0021,0.0033,0.0022,0.0018,0.0076,0.0075,0.004


Unnamed: 0,model,f1_score
0,VotingClassifier,0.638401


In [142]:
blend_004 = blend_models(
    estimator_list=[
        best_models[1],
        best_models[3],
    ],
    fold=5,
    optimize='F1 Macro',
    method='auto',
    verbose=True)

display(evaluate_models([blend_004], df_test))

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,F1 Macro
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.7449,0.7586,0.8797,0.7853,0.8298,0.3269,0.3355,0.6602
1,0.7486,0.7639,0.8788,0.7895,0.8318,0.3404,0.3481,0.6674
2,0.745,0.7633,0.8745,0.788,0.829,0.3326,0.3396,0.6636
3,0.7473,0.7602,0.8823,0.7863,0.8316,0.3322,0.3412,0.6628
4,0.7473,0.7597,0.8773,0.7889,0.8307,0.3374,0.3449,0.666
Mean,0.7466,0.7611,0.8785,0.7876,0.8306,0.3339,0.3419,0.664
Std,0.0015,0.0021,0.0026,0.0016,0.001,0.0046,0.0044,0.0025


Unnamed: 0,model,f1_score
0,VotingClassifier,0.6413


### Stacking Models

In [144]:
stack_001 = stack_models(
    estimator_list=[blend_004, best_models[2]],
    meta_model=best_models[0],
    restack=False
)

display(evaluate_models([stack_001], df_test))

Processing:   0%|          | 0/6 [00:00<?, ?it/s]