Referance: https://www.youtube.com/watch?v=HdlDYng8g9s

In [None]:
import lib._util.visualplot as vp
import lib._util.mlpipe as mlpipe

# Feature scaling
from lib._class.DFStandardScaler import DFStandardScaler
from lib._class.DFRobustScaler import DFRobustScaler
from lib._class.DFMinMaxScaler import DFMinMaxScaler

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier, Perceptron, RidgeClassifier, SGDClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.naive_bayes import BernoulliNB, CategoricalNB, ComplementNB, GaussianNB, MultinomialNB
from sklearn.dummy import DummyClassifier

from sklearn.gaussian_process.kernels import RBF
from sklearn.calibration import CalibratedClassifierCV
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, StackingClassifier

from sklearn import datasets
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold, cross_validate
from sklearn.pipeline import make_pipeline, Pipeline

# Plotly
import plotly.figure_factory as ff

In [None]:
OUT_PATH_GRAPH = 'resources/output/graph/'

In [None]:
def load_data():
    data_dict = datasets.load_iris()
    
    X = pd.DataFrame(
        data_dict['data'],
        columns=data_dict['feature_names']
    )
    y = pd.Series(
        data_dict['target'],
        name='target'
    )
    
    return X, y

In [None]:
X, y = load_data()

X.shape, y.shape

In [None]:
# Class distribution
vp.value_count(y.to_frame(), 'target')

# Best Hyperparameter (GridSearchCV)

In [None]:
search = GridSearchCV(
    estimator=SVC(random_state=0, probability=True),
    param_grid={
        'C': [1, 10, 20, 30, 40, 50],
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'gamma': ['scale', 'auto'],
    },
    scoring=['f1_macro', 'roc_auc_ovr', 'balanced_accuracy'],
    refit='f1_macro',
    cv=StratifiedKFold(n_splits=10),
    n_jobs=-1,
    verbose=10,
)
search.fit(X, y)

In [None]:
result_df = pd.DataFrame(search.cv_results_)
result_df[[
    'params',
    'mean_test_f1_macro', 'rank_test_f1_macro',
    'mean_test_roc_auc_ovr', 'rank_test_roc_auc_ovr',
    'mean_test_balanced_accuracy', 'rank_test_balanced_accuracy'
]].sort_values(by=['mean_test_f1_macro', 'mean_test_roc_auc_ovr', 'mean_test_balanced_accuracy'], ascending=False)

In [None]:
search.best_params_

In [None]:
svc = SVC(**search.best_params_, random_state=0, probability=True)
svc.fit(X, y)

mlpipe.eval_classif(
    y,
    svc.predict(X),
    y_prob=svc.predict_proba(X),
    multi_class='ovr'
)

# Best Hyperparameter (RandomizedSearchCV)

In [None]:
search = RandomizedSearchCV(
    estimator=SVC(random_state=0, probability=True),
    param_distributions={
        'C': [1, 10, 20, 30, 40, 50],
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'gamma': ['scale', 'auto'],
    },
    scoring=['f1_macro', 'roc_auc_ovr', 'balanced_accuracy'],
    refit='f1_macro',
    cv=StratifiedKFold(n_splits=10),
    n_jobs=-1,
    verbose=10,
    n_iter=100,
    random_state=0
)
search.fit(X, y)

In [None]:
result_df = pd.DataFrame(search.cv_results_)
result_df[[
    'params',
    'mean_test_f1_macro', 'rank_test_f1_macro',
    'mean_test_roc_auc_ovr', 'rank_test_roc_auc_ovr',
    'mean_test_balanced_accuracy', 'rank_test_balanced_accuracy'
]].sort_values(by=['mean_test_f1_macro', 'mean_test_roc_auc_ovr', 'mean_test_balanced_accuracy'], ascending=False)

In [None]:
search.best_params_

In [None]:
svc = SVC(**search.best_params_, random_state=0, probability=True)
svc.fit(X, y)

mlpipe.eval_classif(
    y,
    svc.predict(X),
    y_prob=svc.predict_proba(X),
    multi_class='ovr'
)

# Classifier Comparison

In [None]:
X_train, X_test, y_train, y_test = mlpipe.dataset_split(X, y, test_size=.2, stratify=y, random_state=0)

X_train.shape, X_test.shape

In [None]:
# Class distribution
print('Train Dataset:')
vp.value_count(y_train.to_frame(), 'target')
print('\nTest Dataset:')
vp.value_count(y_test.to_frame(), 'target')

### Baseline Classifier

In [None]:
def get_estimators():
    kwargs1 = {'random_state': 0, 'n_jobs': -1}
    kwargs2 = {'n_jobs': -1}
    kwargs3 = {'random_state': 0}
    
    estimators = [
        make_pipeline(
            DFStandardScaler(),
            KNeighborsClassifier(**kwargs2)
        ),
        make_pipeline(
            DFStandardScaler(),
            CalibratedClassifierCV(OneVsRestClassifier(LinearSVC(**kwargs3), **kwargs2))
        ),
        make_pipeline(
            DFStandardScaler(),
            OneVsRestClassifier(NuSVC(**kwargs3, probability=True), **kwargs2)
        ),
        make_pipeline(
            DFStandardScaler(),
            OneVsRestClassifier(SVC(**kwargs3, probability=True), **kwargs2)
        ),
        XGBClassifier(**kwargs1),
        LGBMClassifier(**kwargs1),
        GradientBoostingClassifier(**kwargs3),
        ExtraTreesClassifier(**kwargs1),
        RandomForestClassifier(**kwargs1),
        DecisionTreeClassifier(**kwargs3),
        ExtraTreeClassifier(**kwargs3),
        make_pipeline(
            DFRobustScaler(),
            DFMinMaxScaler(),
            LogisticRegression(**kwargs1)
        ),
        make_pipeline(
            DFRobustScaler(),
            DFMinMaxScaler(),
            CalibratedClassifierCV(PassiveAggressiveClassifier(**kwargs1))
        ),
        make_pipeline(
            DFRobustScaler(),
            DFMinMaxScaler(),
            CalibratedClassifierCV(Perceptron(**kwargs1))
        ),
        make_pipeline(
            DFStandardScaler(),
            CalibratedClassifierCV(RidgeClassifier(**kwargs3))
        ),
        make_pipeline(
            DFRobustScaler(),
            DFMinMaxScaler(),
            SGDClassifier(**kwargs1)
        ),
        make_pipeline(
            DFStandardScaler(),
            LinearDiscriminantAnalysis()
        ),
        make_pipeline(
            DFStandardScaler(),
            QuadraticDiscriminantAnalysis()
        ),
        make_pipeline(
            DFRobustScaler(),
            DFMinMaxScaler(),
            MLPClassifier(**kwargs3)
        ),
        make_pipeline(
            DFStandardScaler(),
            GaussianProcessClassifier(**kwargs1)
        ),
        BernoulliNB(),
        CategoricalNB(),
        ComplementNB(),
        GaussianNB(),
        MultinomialNB(),
        DummyClassifier(**kwargs3),
    ]
    
    def model_name(model):
        name = model.__class__.__name__
        if name == 'RandomForestClassifier':
            return name

        if hasattr(model, 'estimator'):
            return model_name(model.estimator)

        elif hasattr(model, 'base_estimator'):
            return model_name(model.base_estimator)

        return name
    
    return [(model_name(x.steps[-1][1]) if type(x) == Pipeline else model_name(x), x) for x in estimators]

In [None]:
def cross_validation(estimator, X, y):
    return cross_validate(
        estimator,
        X,
        y,
        scoring=['f1_macro', 'roc_auc_ovr', 'balanced_accuracy'],
        cv=StratifiedKFold(10),
        verbose=10,
        n_jobs=-1
    )

In [None]:
eval_dict = {
    'model':             [],
    'f1_macro':          [],
    'roc_auc_ovr':       [],
    'balanced_accuracy': [],
}

for name, estimator in get_estimators():
    print(name)
    cv_dict = cross_validation(estimator, X_train, y_train)
    
    eval_dict['model'].append(name)
    eval_dict['f1_macro'].append(cv_dict['test_f1_macro'])
    eval_dict['roc_auc_ovr'].append(cv_dict['test_roc_auc_ovr'])
    eval_dict['balanced_accuracy'].append(cv_dict['test_balanced_accuracy'])

In [None]:
eval_df = pd.DataFrame(eval_dict)
for cv_score in eval_dict.keys():
    if cv_score == 'model':
        continue
    
    eval_df[f'mean_{cv_score}'] = eval_df[cv_score].apply(np.mean)
    eval_df[f'std_{cv_score}']  = eval_df[cv_score].apply(np.std)

eval_dfs = []
for index in eval_df.index:
    eval_dfs.append(
        pd.DataFrame({
            'model': eval_df.at[index, 'model'],
            'f1_macro': eval_df.at[index, 'f1_macro'],
            'balanced_accuracy': eval_df.at[index, 'balanced_accuracy'],
        })
    )

###### Box

In [None]:
vp.box(
    pd.concat(eval_dfs, axis=0),
    color='model',
    max_col=2,
    title='Classifier - Baseline',
    out_path=OUT_PATH_GRAPH,
    layout_kwargs={'showlegend': False},
    box_kwargs={
        'boxmean': 'sd',
        'boxpoints': False,
    }
)

### Tuned Classifier

In [None]:
kneighbors_params = {
    'kneighborsclassifier__n_neighbors': [3, 5, 10, 25, 50, 100],
    'kneighborsclassifier__weights': ['uniform', 'distance'],
    'kneighborsclassifier__metric': ['euclidean', 'manhattan'],
}
linearsvc_params = {
    'calibratedclassifiercv__base_estimator__estimator__C': [.001, .01, .1, 1, 10, 100, 1000],
    'calibratedclassifiercv__base_estimator__estimator__fit_intercept': [True, False],
    'calibratedclassifiercv__base_estimator__estimator__max_iter': [1000],
}
nusvc_params = {
    'onevsrestclassifier__estimator__nu': np.linspace(.1, 1, 10),
    'onevsrestclassifier__estimator__probability': [True],
    'onevsrestclassifier__estimator__max_iter': [1000],
    'onevsrestclassifier__estimator__decision_function_shape': ['ovr'],
    'onevsrestclassifier__estimator__break_ties': [True, False],
}
svc_params = {
    'onevsrestclassifier__estimator__probability': [True],
    'onevsrestclassifier__estimator__max_iter': [1000],
    'onevsrestclassifier__estimator__decision_function_shape': ['ovr'],
    'onevsrestclassifier__estimator__break_ties': [True, False],
}
tree_params = {
    'max_depth': [None, 3, 5, 7, 10, 15],
    'min_samples_split': np.linspace(.1, 1, 10),
    'min_samples_leaf': np.linspace(.1, .5, 5),
    'max_features': ['sqrt', 'log2'] + list(np.linspace(.5, 1, 6)),
}
logistic_params = {
    'logisticregression__C': [.001, .01, .1, 1, 10, 100, 1000],
    'logisticregression__fit_intercept': [True, False],
    'logisticregression__max_iter': [1000],
    'logisticregression__multi_class': ['auto'],
}
sgd_params = {
#     'sgdclassifier__loss': ['hinge', 'modified_huber', 'squared_hinge',
#                             'squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'],
    'sgdclassifier__loss': ['modified_huber'], # Availble for probabilities
    'sgdclassifier__penalty': ['l1', 'l2', 'elasticnet'],
    'sgdclassifier__l1_ratio': np.linspace(.1, .9, 9),
    'sgdclassifier__fit_intercept': [True, False],
    'sgdclassifier__max_iter': [1000],
    'sgdclassifier__average': [True, False],
}
mlp_params = {
    'mlpclassifier__hidden_layer_sizes': [(100,), (32, 64, 128) , (128, 64, 32)],
    'mlpclassifier__activation': ['relu'],
    'mlpclassifier__max_iter': [1000],
}
naivebayes_params = {
    'alpha': np.linspace(0, 1, 11),
    'fit_prior': [True, False],
}
lgb_params = {
    'boosting_type': ['gbdt', 'dart', 'goss', 'rf'],
    'learning_rate': [.1, .01],
    'objective': ['binary'],
    'colsample_bytree': np.linspace(.5, 1, 6),
    'reg_alpha': np.linspace(0, 1, 11),
    'reg_lambda': np.linspace(0, 1, 11),
}


search_params = [
    # KNeighborsClassifier
    [
        {
            **kneighbors_params,
            'kneighborsclassifier__algorithm': ['ball_tree', 'kd_tree'],
            'kneighborsclassifier__leaf_size': [3, 30, 300, 3000],
        },
        {
            **kneighbors_params,
            'kneighborsclassifier__algorithm': ['brute'],
        }
    ],
    # LinearSVC
    [
        {
            **linearsvc_params,
            'calibratedclassifiercv__base_estimator__estimator__multi_class': ['ovr'],
            'calibratedclassifiercv__base_estimator__estimator__penalty': ['l2'],
            'calibratedclassifiercv__base_estimator__estimator__loss': ['squared_hinge'],
            'calibratedclassifiercv__base_estimator__estimator__dual': [True, False],
        },
        {
            **linearsvc_params,
            'calibratedclassifiercv__base_estimator__estimator__multi_class': ['ovr'],
            'calibratedclassifiercv__base_estimator__estimator__penalty': ['l2'],
            'calibratedclassifiercv__base_estimator__estimator__loss': ['hinge'],
            'calibratedclassifiercv__base_estimator__estimator__dual': [True],
        },
        {
            **linearsvc_params,
            'calibratedclassifiercv__base_estimator__estimator__multi_class': ['ovr'],
            'calibratedclassifiercv__base_estimator__estimator__penalty': ['l1'],
            'calibratedclassifiercv__base_estimator__estimator__loss': ['squared_hinge'],
            'calibratedclassifiercv__base_estimator__estimator__dual': [False],
        },
        {
            **linearsvc_params,
            'calibratedclassifiercv__base_estimator__estimator__multi_class': ['crammer_singer'],
        }
    ],
    # NuSVC
    [
        {
            **nusvc_params,
            'onevsrestclassifier__estimator__kernel': ['linear', 'precomputed'],
            
        },
        {
            **nusvc_params,
            'onevsrestclassifier__estimator__kernel': ['rbf', 'sigmoid'],
            'onevsrestclassifier__estimator__gamma': [.001, .01, .1, 1, 10, 100, 1000, 'scale', 'auto'],
        },
        {
            **nusvc_params,
            'onevsrestclassifier__estimator__kernel': ['poly'],
            'onevsrestclassifier__estimator__gamma': [.001, .01, .1, 1, 10, 100, 1000, 'scale', 'auto'],
            'onevsrestclassifier__estimator__degree': [.003, .03, .3, 3, 30, 300, 3000],
        }
    ],
    # SVC
    [
        {
            **svc_params,
            'onevsrestclassifier__estimator__kernel': ['linear', 'precomputed'],
        },
        {
            **svc_params,
            'onevsrestclassifier__estimator__kernel': ['rbf', 'sigmoid'],
            'onevsrestclassifier__estimator__gamma': [.001, .01, .1, 1, 10, 100, 1000, 'scale', 'auto'],
        },
        {
            **svc_params,
            'onevsrestclassifier__estimator__kernel': ['poly'],
            'onevsrestclassifier__estimator__gamma': [.001, .01, .1, 1, 10, 100, 1000, 'scale', 'auto'],
            'onevsrestclassifier__estimator__degree': [.003, .03, .3, 3, 30, 300, 3000],
        }
    ],
    # XGBClassifier
    [
        {
            'n_estimators': [25, 50, 100, 500, 1000],
            'max_depth': [None, 3, 5, 7, 10, 15],
            'learning_rate': [.1, .01],
            'objective': ['reg:logistic', 'binary:logistic'],
            'booster': ['gbtree', 'gblinear', 'dart'],
            'subsample': [.8],
            'colsample_bytree': np.linspace(.5, 1, 6),
            'reg_alpha': np.linspace(0, 1, 11),
            'reg_lambda': np.linspace(0, 1, 11),
        }
    ],
    # LGBMClassifier
    [
        {
            **lgb_params,
            'num_leaves': [31],
            'max_depth': [-1],
        },
        {
            **lgb_params,
            'num_leaves': [9],
            'max_depth': [3],
        },
        {
            **lgb_params,
            'num_leaves': [25],
            'max_depth': [5],
        },
        {
            **lgb_params,
            'num_leaves': [49],
            'max_depth': [7],
        },
        {
            **lgb_params,
            'num_leaves': [100],
            'max_depth': [10],
        },
        {
            **lgb_params,
            'num_leaves': [225],
            'max_depth': [15],
        }
    ],
    # GradientBoostingClassifier
    [
        {
            **tree_params,
            'criterion': ['friedman_mse', 'mse', 'mae'],
            'n_estimators': [25, 50, 100, 500, 1000],
            'loss': ['deviance', 'exponential'],
            'learning_rate': [.1, .01],
            'subsample': np.linspace(.5, 1, 6),
        }
    ],
    # ExtraTreesClassifier
    [
        {
            **tree_params,
            'criterion': ['gini', 'entropy'],
            'n_estimators': [25, 50, 100, 500, 1000],
            'oob_score': [True, False],
            'bootstrap': [False],
        },
        {
            **tree_params,
            'criterion': ['gini', 'entropy'],
            'n_estimators': [25, 50, 100, 500, 1000],
            'oob_score': [True, False],
            'bootstrap': [True],
            'max_samples': np.linspace(.5, 1, 6),
        }
    ],
    # RandomForestClassifier
    [
        {
            **tree_params,
            'criterion': ['gini', 'entropy'],
            'n_estimators': [25, 50, 100, 500, 1000],
            'bootstrap': [False],
        },
        {
            **tree_params,
            'criterion': ['gini', 'entropy'],
            'n_estimators': [25, 50, 100, 500, 1000],
            'bootstrap': [True],
            'max_samples': np.linspace(.5, 1, 6),
        }
    ],
    # DecisionTreeClassifier
    [
        {
            **tree_params,
            'criterion': ['gini', 'entropy'],
            'splitter': ['random', 'best'],
        }
    ],
    # ExtraTreeClassifier
    [
        {
            **tree_params,
            'criterion': ['gini', 'entropy'],
            'splitter': ['random', 'best'],
        }
    ],
    # LogisticRegression
    [
        {
            **logistic_params,
            'logisticregression__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
            'logisticregression__penalty': ['l1'],
        },
        {
            **logistic_params,
            'logisticregression__solver': ['newton-cg', 'lbfgs', 'sag', 'saga'],
            'logisticregression__penalty': ['l2'],
        },
        {
            **logistic_params,
            'logisticregression__solver': ['liblinear'],
            'logisticregression__penalty': ['l2'],
            'logisticregression__dual': [True, False],
        },
        {
            **logistic_params,
            'logisticregression__solver': ['saga'],
            'logisticregression__penalty': ['elasticnet'],
            'logisticregression__l1_ratio': np.linspace(.1, .9, 9),
        }
    ],
    # PassiveAggressiveClassifier
    [
        {
            'calibratedclassifiercv__base_estimator__C': [.001, .01, .1, 1, 10, 100, 1000],
            'calibratedclassifiercv__base_estimator__fit_intercept': [True, False],
            'calibratedclassifiercv__base_estimator__max_iter': [1000],
            'calibratedclassifiercv__base_estimator__loss': ['hinge', 'squared_hinge'],
            'calibratedclassifiercv__base_estimator__average': [True, False],
        }
    ],
    # Perceptron
    [
        {
            'calibratedclassifiercv__base_estimator__penalty': [None, 'l1', 'l2', 'elasticnet'],
            'calibratedclassifiercv__base_estimator__fit_intercept': [True, False],
            'calibratedclassifiercv__base_estimator__max_iter': [1000],
        }
    ],
    # RidgeClassifier
    [
        {
            'calibratedclassifiercv__base_estimator__alpha': [.001, .01, .1, 1, 10, 100, 1000],
            'calibratedclassifiercv__base_estimator__fit_intercept': [True, False],
            'calibratedclassifiercv__base_estimator__max_iter': [1000],
            'calibratedclassifiercv__base_estimator__solver': ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag','saga'],
        }
    ],
    # SGDClassifier
    [
        {
            **sgd_params,
            'sgdclassifier__learning_rate': ['optimal'],
        },
        {
            **sgd_params,
            'sgdclassifier__learning_rate': ['invscaling', 'adaptive'],
            'sgdclassifier__eta0': [.1],
        }
    ],
    # LinearDiscriminantAnalysis
    [
        {
            'lineardiscriminantanalysis__solver': ['svd'],
        },
        {
            'lineardiscriminantanalysis__solver': ['lsqr', 'eigen'],
            'lineardiscriminantanalysis__shrinkage': [None, 'auto'] + list(np.linspace(0, 1, 11)),
        }
    ],
    # QuadraticDiscriminantAnalysis
    [
        {
            'quadraticdiscriminantanalysis__reg_param': np.linspace(0, 1, 11),
        }
    ],
    # MLPClassifier
    [
        {
            **mlp_params,
            'mlpclassifier__solver': ['lbfgs'],
        },
        {
            **mlp_params,
            'mlpclassifier__solver': ['adam'],
            'mlpclassifier__learning_rate_init': [.01, .001],
        },
        {
            **mlp_params,
            'mlpclassifier__solver': ['sgd'],
            'mlpclassifier__learning_rate': ['invscaling', 'adaptive'],
            'mlpclassifier__learning_rate_init': [.01, .001],
        }
    ],
    # GaussianProcessClassifier
    [
        {
            'gaussianprocessclassifier__kernel': [x * RBF(x) for x in [.001, .01, .1, 1, 10, 100, 1000]],
            'gaussianprocessclassifier__max_iter_predict': [1000],
#             'gaussianprocessclassifier__multi_class': ['one_vs_rest', 'one_vs_one'],
            'gaussianprocessclassifier__multi_class': ['one_vs_rest'], # Available for roc_auc_ovr metric
        }
    ],
    # BernoulliNB
    [
        {
            **naivebayes_params,
        }
    ],
    # CategoricalNB
    [
        {
            **naivebayes_params,
        }
    ],
    # ComplementNB
    [
        {
            **naivebayes_params,
            'norm': [True, False],
        }
    ],
    # GaussianNB
    [
        {
            'var_smoothing': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9],
        }
    ],
    # MultinomialNB
    [
        {
            **naivebayes_params,
        }
    ],
    # DummyClassifier
    [
        {
            'strategy': ['stratified', 'prior', 'uniform'],
        }
    ]
]

In [None]:
eval_dict = {
    'model':             [],
    'f1_macro':          [],
    'roc_auc_ovr':       [],
    'balanced_accuracy': [],
    'best_param':        [],
    'best_estimator':    [],
}

for index, (name, estimator) in enumerate(get_estimators()):
    print(name)
    eval_dict['model'].append(name)
    
    search = RandomizedSearchCV(
        estimator=estimator,
        param_distributions=search_params[index],
        scoring=['f1_macro', 'roc_auc_ovr', 'balanced_accuracy'],
        refit='f1_macro',
        cv=StratifiedKFold(n_splits=10),
        n_jobs=-1,
        verbose=10,
        n_iter=100,
        random_state=0
    )
    search.fit(X_train, y_train)
    eval_dict['best_param'].append(
        search.best_params_
    )
    eval_dict['best_estimator'].append(
        search.best_estimator_
    )
    
    cv_dict = cross_validation(search.best_estimator_, X_train, y_train)
    eval_dict['f1_macro'].append(cv_dict['test_f1_macro'])
    eval_dict['roc_auc_ovr'].append(cv_dict['test_roc_auc_ovr'])
    eval_dict['balanced_accuracy'].append(cv_dict['test_balanced_accuracy'])

In [None]:
eval_df = pd.DataFrame(eval_dict)
for cv_score in eval_dict.keys():
    if cv_score in ['model', 'best_param', 'best_estimator']:
        continue
    
    eval_df[f'mean_{cv_score}'] = eval_df[cv_score].apply(np.mean)
    eval_df[f'std_{cv_score}']  = eval_df[cv_score].apply(np.std)

eval_dfs = []
for index in eval_df.index:
    eval_dfs.append(
        pd.DataFrame({
            'model': eval_df.at[index, 'model'],
            'f1_macro': eval_df.at[index, 'f1_macro'],
            'balanced_accuracy': eval_df.at[index, 'balanced_accuracy'],
        })
    )

###### Box

In [None]:
vp.box(
    pd.concat(eval_dfs, axis=0),
    color='model',
    max_col=2,
    title='Classifier - Tuned',
    out_path=OUT_PATH_GRAPH,
    layout_kwargs={'showlegend': False},
    box_kwargs={
        'boxmean': 'sd',
        'boxpoints': False,
    }
)

###### Confusion Matrix

In [None]:
data_groups = []
annotations = ()

for index in tqdm(eval_df.index):
    y_pred = eval_df.at[index, 'best_estimator'].predict(X_test)
    try:
        y_prob = eval_df.at[index, 'best_estimator'].predict_proba(X_test)
    except AttributeError:
        y_prob = pd.get_dummies(y_pred)
    
    eval_dict = mlpipe.eval_classif(
        y_test,
        y_pred,
        y_prob=y_prob,
        multi_class='ovr',
        return_evaluation=True,
        show_evaluation=False
    )
    
    # Reverse sorting to ensure plotly display is same as dataframe layout
    tmp_df = eval_dict['matrix'].sort_index(ascending=False)

    # Heatmap data
    fig = fig = ff.create_annotated_heatmap(
        z=tmp_df.values,
        x=[f'Pred {x}' for x in tmp_df.columns],
        y=[f'True {x}' for x in tmp_df.index],
        colorscale='Portland',
        zmin=0,
        zmax=y_test.value_counts().values[-1]
    )
    data_groups.append(fig['data'])
    
    # Heatmap annotation
    annotation = fig['layout']['annotations']
    for x in annotation:
        suffix = '' if index == 0 else index+1
        x['xref'] = f'x{suffix}'
        x['yref'] = f'y{suffix}'
    annotations += annotation

In [None]:
vp.datagroups_subplots(data_groups,
                       xaxis_titles=eval_df['model'],
                       max_col=4,
                       title='Confusion Matrix',
                       out_path=OUT_PATH_GRAPH,
                       layout_kwargs={
                           'height': 2000,
                           'annotations': annotations,
                       })