In [1]:
import pandas as pd
import numpy as np
import pickle

CLASSIFIER_MODES = [
    'decisiontreeclassifier',
    'gaussiannb',
    'multinomialnb',
    'svc',
    'adaboostclassifier',
    'randomforestclassifier',
    'mlpclassifier']

def parse_model_selection_result(ms_result: tuple) -> list:
    """Parse the model selection result tuple and get the best models.

    Args:
        ms_result: Model selection result tuple.

    Returns:
        List of best model and statistics for each classifiers.

    """
    candidates, _ = ms_result
    candidates = [(i, c, cv['best']) for i, c, cv in candidates]

    f1s_mean = []
    for i, c, cv_best in candidates:
        # Iterate over splits to calculate average F1 score.
        f1s = [cv_best[f'split_{j}']['f1'] for j in range(len(cv_best) - 1)]
        f1s_mean += [np.mean(np.nan_to_num(f1s))]

    candidates = list(zip(candidates, f1s_mean))
    candidates = sorted(candidates, key=lambda x: x[1], reverse=True)

    best_candidate_per_clf = []
    for clf in CLASSIFIER_MODES:
        for (i, c, cv_best), f1_mean in candidates:
            if c[3] == clf:
                if cv_best['param'] is not None:
                    cv_best['param'] = {k.split('__')[-1]: v
                                        for k, v in cv_best['param'].items()}

                best_candidate_per_clf += [((i, c, cv_best), f1_mean)]
                break

    return best_candidate_per_clf

In [2]:
with open('./output/10MVIout/output_12to18_yesmental/results.pkl', 'rb') as f:
        model_selection_result = pickle.load(f)

In [3]:
model_selection_result

([(1,
   ('standard', 'knn', 'isolation_forest', 'gaussiannb'),
   {0: {'param': None,
     'split_0': {'tn': 159,
      'fp': 10,
      'fn': 323,
      'tp': 15,
      'f1': 0.08264462809917354},
     'split_1': {'tn': 167,
      'fp': 12,
      'fn': 312,
      'tp': 16,
      'f1': 0.0898876404494382},
     'split_2': {'tn': 171,
      'fp': 12,
      'fn': 301,
      'tp': 23,
      'f1': 0.12813370473537603},
     'split_3': {'tn': 190,
      'fp': 6,
      'fn': 291,
      'tp': 19,
      'f1': 0.11343283582089551},
     'split_4': {'tn': 179,
      'fp': 5,
      'fn': 313,
      'tp': 9,
      'f1': 0.053571428571428575}},
    'best': {'param': None,
     'split_0': {'tn': 159,
      'fp': 10,
      'fn': 323,
      'tp': 15,
      'f1': 0.08264462809917354},
     'split_1': {'tn': 167,
      'fp': 12,
      'fn': 312,
      'tp': 16,
      'f1': 0.0898876404494382},
     'split_2': {'tn': 171,
      'fp': 12,
      'fn': 301,
      'tp': 23,
      'f1': 0.12813370473537603},


In [4]:
with open('./output/10MVIout/output_12to18_yesmental_smotefirst/results.pkl', 'rb') as f:
        model_selection_result_smote = pickle.load(f)

In [5]:
model_selection_result_smote

([(1,
   ('standard', 'knn', 'isolation_forest', 'gaussiannb'),
   {0: {'param': None,
     'split_0': {'tn': 307,
      'fp': 12,
      'fn': 308,
      'tp': 22,
      'f1': 0.12087912087912088},
     'split_1': {'tn': 310,
      'fp': 8,
      'fn': 307,
      'tp': 24,
      'f1': 0.1322314049586777},
     'split_2': {'tn': 315,
      'fp': 15,
      'fn': 303,
      'tp': 16,
      'f1': 0.09142857142857141},
     'split_3': {'tn': 311,
      'fp': 11,
      'fn': 308,
      'tp': 19,
      'f1': 0.10644257703081232},
     'split_4': {'tn': 325,
      'fp': 8,
      'fn': 301,
      'tp': 14,
      'f1': 0.08308605341246292}},
    'best': {'param': None,
     'split_0': {'tn': 307,
      'fp': 12,
      'fn': 308,
      'tp': 22,
      'f1': 0.12087912087912088},
     'split_1': {'tn': 310,
      'fp': 8,
      'fn': 307,
      'tp': 24,
      'f1': 0.1322314049586777},
     'split_2': {'tn': 315,
      'fp': 15,
      'fn': 303,
      'tp': 16,
      'f1': 0.09142857142857141},
 