# Skripsi v1

## Integrasi SMOTE dan Neural Network

In [1]:
def repro():
  seed_value= 42

  import os
  os.environ['PYTHONHASHSEED']=str(seed_value)
  import random
  random.seed(seed_value)

  import numpy as np
  np.random.seed(seed_value)

  import tensorflow as tf
  tf.random.set_seed(seed_value)

  from tensorflow.keras import backend as K
  
  session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
  sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
  tf.compat.v1.keras.backend.set_session(sess)

In [None]:
repro()

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

# Import 

In [None]:
import model
import metric
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from bayes_opt import BayesianOptimization as bayesopt
import tool

%load_ext autoreload
%autoreload 2

# Dataset

In [None]:
dataset_kc1 = pd.read_csv("../Dataset/clean/KC1.csv")
dataset_kc2 = pd.read_csv("../Dataset/clean/KC2.csv")
dataset_pc1 = pd.read_csv("../Dataset/clean/PC1.csv")
dataset_pc3 = pd.read_csv("../Dataset/clean/PC3.csv")
dataset_pc4 = pd.read_csv("../Dataset/clean/PC4.csv")
dataset_cm1 = pd.read_csv("../Dataset/clean/CM1.csv")
dataset_jm1 = pd.read_csv("../Dataset/clean/JM1.csv")

idx_missing_JM1 = dataset_jm1.loc[dataset_jm1['uniq_Op'] == '?'].index
dataset_jm1 = dataset_jm1.drop(idx_missing_JM1)

dataset = {
    'KC1' : dataset_kc1,
    'KC2' : dataset_kc2,
    'PC1' : dataset_pc1,
    'PC3' : dataset_pc3,
    'PC4' : dataset_pc4,
    'CM1' : dataset_cm1,
    'JM1' : dataset_jm1
}

Setiap dataset memiliki column class berbeda-beda. dibawah ini proses mengubah kolom kelas menjadi seragam ("Defect")

In [None]:
for i in dataset:
    print(i)
    dataset[i] = dataset[i].rename(columns = {dataset[i].columns[-1]:'defect'})
    display(dataset[i])

## Melihat Distribution

Melihat distribusi Dataset

In [None]:
from sklearn.decomposition import PCA
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
%matplotlib inline
scaller = MinMaxScaler()


pca = PCA(n_components=3)
for i in dataset:
    data = dataset[i]
    x = data.loc[:, data.columns != 'defect'].values 
    x = scaller.fit_transform(x)

    
    principalComponents = pca.fit_transform(x)
    principalDf = pd.DataFrame(data = principalComponents, columns = ['V1', 'V2','V3'])
    finalDf = pd.concat([principalDf, data[['defect']]], axis = 1)
    
    fig = plt.figure(figsize = (8,8))
    ax = plt.axes(projection ='3d') 
    ax.set_xlabel('V1', fontsize = 15)
    ax.set_ylabel('V2', fontsize = 15)
    ax.set_zlabel('V3', fontsize = 15) 
    ax.set_title(i, fontsize = 20)
    targets = [False, True]
    colors = ['g', 'r','b']
    for target, color in zip(targets,colors):
        indicesToKeep = finalDf['defect'] == target
        ax.scatter(finalDf.loc[indicesToKeep, 'V1'], finalDf.loc[indicesToKeep, 'V2'],finalDf.loc[indicesToKeep, 'V3'], c = color, s = 50)
        ax.legend(targets)
        ax.grid()

# Check DMR

In [None]:
dmr_data = {}
for i in dataset:
    unique, counts = np.unique(dataset[i]['defect'], return_counts=True)
    dmr = (round((counts[1]/(counts[0]+counts[1])*100),2))
    print(dmr, unique, counts)
    dmr_data[i] = dmr

# Penerapan Min-Max Scaller untuk mengubah variant distribusi
$$ x_{scaled} = \frac{x - x_{min}}{x_{max} - x_{min}} $$

In [None]:
for i in dataset:
    dataset[i].loc[:, dataset[i].columns != dataset[i].columns[-1]] = scaller.fit_transform(dataset[i].loc[:, dataset[i].columns != dataset[i].columns[-1]])
    print(i)
    display(dataset[i])

# Load Data to 10 Cross Validation

In [35]:
data_load = {}
for i in dataset:
    data_load[i] = tool.load_data(dataset[i])

# SMOTE

In [36]:
def SMOTUNE(X_train, y_train, sampling, neighbors):
    neighbors = int(neighbors)
    sm = SMOTE(random_state=42, sampling_strategy=sampling, k_neighbors=neighbors)
    X_train, y_train = sm.fit_sample(X_train, y_train)
    return X_train, y_train

# Bayesian Optimization

In [37]:
from bayes_opt import UtilityFunction

def optimize(model, pbounds, init = 15, iter = 35):
    repro()
    utility = UtilityFunction(kind="ei", kappa=2.5, xi=0.0)

    optimizer = bayesopt(
        f=model,
        pbounds=pbounds,
        verbose=2,
        random_state=42,
    )
    optimizer.maximize(init_points=init, n_iter=iter, acq='ei')

    return optimizer

# Tie all it Together

In [23]:
from tensorflow.keras.utils import plot_model
from imblearn.over_sampling import SMOTE
test = []
def run_ml(data, fold, pbounds, model, do_optimize = True):
    repro()
    fold_generator = tool.split(data, fold)
    sm = SMOTE(random_state=42)

    
    def go(do_optimize = True, **pbounds):
        
        y_train_pred_collect, y_dev_pred_collect, y_test_pred_collect = [], [], []
        y_train_true_collect, y_dev_true_collect, y_test_true_collect = [], [], []
        models = model(**pbounds)
        
        
        
        for remainder, test in fold_generator():
            repro()
            train, dev = next(tool.split(remainder, fold)())

            X_train, y_train = train[0], train[1]
            X_dev, y_dev = dev[0], dev[1]
            X_test, y_test = test[0],test[1]
            
#             if 'sampling' and 'neighbors' in pbounds:
            X_train, y_train = SMOTUNE(X_train, y_train, pbounds['sampling'], pbounds['neighbors'])

            models.fit(X_train, y_train)


            #Collect Train Dev Test
    
            y_train_pred_collect = np.append(y_train_pred_collect, models.predict(X_train).round())
            y_train_true_collect = np.append(y_train_true_collect, y_train)
            
            y_dev_pred_collect = np.append(y_dev_pred_collect, models.predict(X_dev).round())
            y_dev_true_collect = np.append(y_dev_true_collect, y_dev)
            
            y_test_pred_collect = np.append(y_test_pred_collect, models.predict(X_test).round())
            y_test_true_collect = np.append(y_test_true_collect, y_test)
            
        
        if(do_optimize):
            result = metric.gmeans(y_dev_true_collect, y_dev_pred_collect)
        else:

            result = {
                "Train" : {
                    "Gmeans" : metric.gmeans(y_train_true_collect, y_train_pred_collect),
                    "Recall" : metric.recall(y_train_true_collect, y_train_pred_collect),
                    "pf" : metric.pf(y_train_true_collect, y_train_pred_collect),
                    "cf_matrix" : metric.conf_matrix(y_train_true_collect, y_train_pred_collect)
                },
                "Dev" : {
                    "Gmeans" : metric.gmeans(y_dev_true_collect, y_dev_pred_collect),
                    "Recall" : metric.recall(y_dev_true_collect, y_dev_pred_collect),
                    "pf" : metric.pf(y_dev_true_collect, y_dev_pred_collect),
                    "cf_matrix" : metric.conf_matrix(y_dev_true_collect, y_dev_pred_collect)
                },
                "Test" : {
                    "Gmeans" : metric.gmeans(y_test_true_collect, y_test_pred_collect),
                    "Recall" : metric.recall(y_test_true_collect, y_test_pred_collect),
                    "pf" : metric.pf(y_test_true_collect, y_test_pred_collect),
                    "cf_matrix" : metric.conf_matrix(y_test_true_collect, y_test_pred_collect)
                }
                
            }
        return result
        
    if(do_optimize):
        return optimize(go, pbounds, init = 15, iter = 35)
    else:
        return go(do_optimize = False, **pbounds)
        

# Define Hyperparam Search Space

Learning Rate = 0.001 to 0.01

Dropout = 0.1 to 0.6

layer = 1 to 5

sampling =  DMR to 1

neighbors =  1 - 20

In [13]:
for i in dataset:    
    bound_naive_bayes[i] = {
        'var_smoothing': (0.0,1),
        'sampling': ((dmr_data[i]/100)+0.15,1),
        'neighbors': (1,10)
    }


    bound_logistic_regression[i] = {
        'sampling': ((dmr_data[i]/100)+0.15,1),
        'neighbors': (1,10),
        'max_iter': (50,200),
    #     'verbose': (0,10),
        'c': (1,10)
    }

    bound_decission_tree[i] = {
        'depth': (1, 10),
        'sampling': ((dmr_data[i]/100)+0.15,1),
        'neighbors': (1,10)
    }

    bound_random_forest[i] = {
        'min_leaf' : (1,20),
        'min_split' : (2,20),
        'max_leaf_nodes' : (2,50),
        'max_features' : (0.01,1),
        'depth': (1, 10),
        'estimator': (10,150),
        'sampling': ((dmr_data[i]/100)+0.15,1),
        'neighbors': (1,10)
    }

    bound_knn[i] = {
        'sampling': ((dmr_data[i]/100)+0.15,1),
        'neighbors': (1,10),
        'n': (1,10),
        'leaf': (10,100)
    }

# Let's Optimize it

## Random Forest

### KC1

In [20]:
hyperparam_rf_KC1 = run_ml(data_load['KC1'], 10, bound_random_forest['KC1'], model.random_forest)

|   iter    |  target   |   depth   | estimator | max_fe... | max_le... | min_leaf  | min_split | neighbors | sampling  |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.73    [0m | [0m 4.371   [0m | [0m 143.1   [0m | [0m 0.7347  [0m | [0m 30.74   [0m | [0m 3.964   [0m | [0m 4.808   [0m | [0m 1.523   [0m | [0m 0.9069  [0m |
| [0m 2       [0m | [0m 0.6442  [0m | [0m 6.41    [0m | [0m 109.1   [0m | [0m 0.03038 [0m | [0m 48.56   [0m | [0m 16.82   [0m | [0m 5.822   [0m | [0m 2.636   [0m | [0m 0.4321  [0m |
| [0m 3       [0m | [0m 0.6654  [0m | [0m 3.738   [0m | [0m 83.47   [0m | [0m 0.4376  [0m | [0m 15.98   [0m | [0m 12.63   [0m | [0m 4.511   [0m | [0m 3.629   [0m | [0m 0.5594  [0m |
| [0m 4       [0m | [0m 0.6459  [0m | [0m 5.105   [0m | [0m 119.9   [0m | [0m 0.2077  [0m | [0m 26.68   [0m | [0m 12.26   [0m | [0m 2.8

| [0m 41      [0m | [0m 0.6708  [0m | [0m 9.358   [0m | [0m 32.83   [0m | [0m 0.2873  [0m | [0m 37.01   [0m | [0m 19.56   [0m | [0m 9.682   [0m | [0m 5.972   [0m | [0m 0.4493  [0m |
| [0m 42      [0m | [0m 0.6898  [0m | [0m 4.397   [0m | [0m 69.44   [0m | [0m 0.7803  [0m | [0m 31.73   [0m | [0m 19.24   [0m | [0m 5.69    [0m | [0m 6.458   [0m | [0m 0.7379  [0m |
| [0m 43      [0m | [0m 0.6654  [0m | [0m 6.546   [0m | [0m 87.82   [0m | [0m 0.4534  [0m | [0m 15.72   [0m | [0m 3.672   [0m | [0m 19.26   [0m | [0m 3.718   [0m | [0m 0.5314  [0m |
| [0m 44      [0m | [0m 0.6878  [0m | [0m 8.726   [0m | [0m 13.54   [0m | [0m 0.31    [0m | [0m 28.57   [0m | [0m 9.598   [0m | [0m 18.14   [0m | [0m 6.195   [0m | [0m 0.6043  [0m |
| [0m 45      [0m | [0m 0.6207  [0m | [0m 7.616   [0m | [0m 45.73   [0m | [0m 0.6723  [0m | [0m 15.07   [0m | [0m 19.91   [0m | [0m 12.25   [0m | [0m 9.981   [0m | [0m 0.372

In [21]:
best_hyperparam_rf_KC1 = hyperparam_rf_KC1.max['params']
result_rf_KC1 = run_ml(data_load['KC1'], 10, best_hyperparam_rf_KC1, model.random_forest,do_optimize = False)
result_rf_KC1

{'Train': {'Gmeans': 0.8736298036939085,
  'Recall': 0.9027198158008346,
  'pf': 0.1545227870896246,
  'cf_matrix': {'tn': 12207, 'fp': 2231, 'fn': 1352, 'tp': 12546}},
 'Dev': {'Gmeans': 0.7422179680122989,
  'Recall': 0.6735395189003437,
  'pf': 0.18210068365444376,
  'cf_matrix': {'tn': 1316, 'fp': 293, 'fn': 95, 'tp': 196}},
 'Test': {'Gmeans': 0.7022312273911734,
  'Recall': 0.6042944785276073,
  'pf': 0.18395961862030286,
  'cf_matrix': {'tn': 1455, 'fp': 328, 'fn': 129, 'tp': 197}}}

### KC2

In [25]:
hyperparam_rf_KC2 = run_ml(data_load['KC2'], 10, bound_random_forest['KC2'], model.random_forest)

|   iter    |  target   |   depth   | estimator | max_fe... | max_le... | min_leaf  | min_split | neighbors | sampling  |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.7747  [0m | [0m 4.371   [0m | [0m 143.1   [0m | [0m 0.7347  [0m | [0m 30.74   [0m | [0m 3.964   [0m | [0m 4.808   [0m | [0m 1.523   [0m | [0m 0.9137  [0m |
| [95m 2       [0m | [95m 0.8206  [0m | [95m 6.41    [0m | [95m 109.1   [0m | [95m 0.03038 [0m | [95m 48.56   [0m | [95m 16.82   [0m | [95m 5.822   [0m | [95m 2.636   [0m | [95m 0.4733  [0m |
| [0m 3       [0m | [0m 0.8076  [0m | [0m 3.738   [0m | [0m 83.47   [0m | [0m 0.4376  [0m | [0m 15.98   [0m | [0m 12.63   [0m | [0m 4.511   [0m | [0m 3.629   [0m | [0m 0.5913  [0m |
| [0m 4       [0m | [0m 0.8028  [0m | [0m 5.105   [0m | [0m 119.9   [0m | [0m 0.2077  [0m | [0m 26.68   [0m | [0m 12.26   [0m 

| [0m 41      [0m | [0m 0.8137  [0m | [0m 5.526   [0m | [0m 72.36   [0m | [0m 0.5658  [0m | [0m 6.701   [0m | [0m 1.1     [0m | [0m 14.48   [0m | [0m 3.627   [0m | [0m 0.9879  [0m |
| [0m 42      [0m | [0m 0.826   [0m | [0m 9.337   [0m | [0m 123.5   [0m | [0m 0.2142  [0m | [0m 42.7    [0m | [0m 17.1    [0m | [0m 4.234   [0m | [0m 9.372   [0m | [0m 0.9632  [0m |
| [0m 43      [0m | [0m 0.7504  [0m | [0m 8.956   [0m | [0m 107.1   [0m | [0m 0.6296  [0m | [0m 26.3    [0m | [0m 6.374   [0m | [0m 14.12   [0m | [0m 7.66    [0m | [0m 0.8086  [0m |
| [0m 44      [0m | [0m 0.8264  [0m | [0m 4.372   [0m | [0m 35.91   [0m | [0m 0.1248  [0m | [0m 18.09   [0m | [0m 8.374   [0m | [0m 6.182   [0m | [0m 7.822   [0m | [0m 0.612   [0m |
| [0m 45      [0m | [0m 0.815   [0m | [0m 6.789   [0m | [0m 76.17   [0m | [0m 0.09907 [0m | [0m 35.47   [0m | [0m 17.27   [0m | [0m 13.36   [0m | [0m 7.357   [0m | [0m 0.955

In [26]:
best_hyperparam_rf_KC2 = hyperparam_rf_KC2.max['params']
result_rf_KC2 = run_ml(data_load['KC2'], 10, best_hyperparam_rf_KC2, model.random_forest,do_optimize = False)
result_rf_KC2

{'Train': {'Gmeans': 0.7978736683107516,
  'Recall': 0.7377551020408163,
  'pf': 0.13710879284649777,
  'cf_matrix': {'tn': 2895, 'fp': 460, 'fn': 514, 'tp': 1446}},
 'Dev': {'Gmeans': 0.8385799748795614,
  'Recall': 0.8222222222222222,
  'pf': 0.14473684210526316,
  'cf_matrix': {'tn': 325, 'fp': 55, 'fn': 16, 'tp': 74}},
 'Test': {'Gmeans': 0.7618892122316059,
  'Recall': 0.6728971962616822,
  'pf': 0.13734939759036144,
  'cf_matrix': {'tn': 358, 'fp': 57, 'fn': 35, 'tp': 72}}}

### PC1

In [27]:
hyperparam_rf_PC1 = run_ml(data_load['PC1'], 5, bound_random_forest['PC1'], model.random_forest)

|   iter    |  target   |   depth   | estimator | max_fe... | max_le... | min_leaf  | min_split | neighbors | sampling  |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.7634  [0m | [0m 4.371   [0m | [0m 143.1   [0m | [0m 0.7347  [0m | [0m 30.74   [0m | [0m 3.964   [0m | [0m 4.808   [0m | [0m 1.523   [0m | [0m 0.8955  [0m |
| [0m 2       [0m | [0m 0.5899  [0m | [0m 6.41    [0m | [0m 109.1   [0m | [0m 0.03038 [0m | [0m 48.56   [0m | [0m 16.82   [0m | [0m 5.822   [0m | [0m 2.636   [0m | [0m 0.3626  [0m |
| [0m 3       [0m | [0m 0.7069  [0m | [0m 3.738   [0m | [0m 83.47   [0m | [0m 0.4376  [0m | [0m 15.98   [0m | [0m 12.63   [0m | [0m 4.511   [0m | [0m 3.629   [0m | [0m 0.5054  [0m |
| [0m 4       [0m | [0m 0.6238  [0m | [0m 5.105   [0m | [0m 119.9   [0m | [0m 0.2077  [0m | [0m 26.68   [0m | [0m 12.26   [0m | [0m 2.8

| [0m 41      [0m | [0m 0.488   [0m | [0m 8.514   [0m | [0m 32.52   [0m | [0m 1.0     [0m | [0m 35.72   [0m | [0m 20.0    [0m | [0m 20.0    [0m | [0m 1.079   [0m | [0m 0.2194  [0m |
| [0m 42      [0m | [0m 0.7673  [0m | [0m 5.379   [0m | [0m 37.89   [0m | [0m 0.01    [0m | [0m 36.86   [0m | [0m 19.57   [0m | [0m 20.0    [0m | [0m 7.598   [0m | [0m 1.0     [0m |
| [0m 43      [0m | [0m 0.7536  [0m | [0m 8.621   [0m | [0m 40.1    [0m | [0m 1.0     [0m | [0m 42.21   [0m | [0m 18.6    [0m | [0m 18.78   [0m | [0m 6.085   [0m | [0m 1.0     [0m |
| [0m 44      [0m | [0m 0.6566  [0m | [0m 2.024   [0m | [0m 145.4   [0m | [0m 0.4838  [0m | [0m 24.83   [0m | [0m 12.52   [0m | [0m 2.528   [0m | [0m 7.554   [0m | [0m 0.5092  [0m |
| [0m 45      [0m | [0m 0.7547  [0m | [0m 7.179   [0m | [0m 150.0   [0m | [0m 1.0     [0m | [0m 28.98   [0m | [0m 11.4    [0m | [0m 5.265   [0m | [0m 1.0     [0m | [0m 1.0  

In [28]:
best_hyperparam_rf_PC1 = hyperparam_rf_PC1.max['params']
result_rf_PC1 = run_ml(data_load['PC1'], 5, best_hyperparam_rf_PC1, model.random_forest,do_optimize = False)
result_rf_PC1

{'Train': {'Gmeans': 0.9050732433110459,
  'Recall': 0.9366666666666666,
  'pf': 0.12545454545454546,
  'cf_matrix': {'tn': 2886, 'fp': 414, 'fn': 209, 'tp': 3091}},
 'Dev': {'Gmeans': 0.7908527224925227,
  'Recall': 0.7419354838709677,
  'pf': 0.1570048309178744,
  'cf_matrix': {'tn': 698, 'fp': 130, 'fn': 16, 'tp': 46}},
 'Test': {'Gmeans': 0.7286423414361913,
  'Recall': 0.6363636363636364,
  'pf': 0.16569767441860464,
  'cf_matrix': {'tn': 861, 'fp': 171, 'fn': 28, 'tp': 49}}}

### PC3

In [29]:
hyperparam_rf_PC3 = run_ml(data_load['PC3'], 7, bound_random_forest['PC3'], model.random_forest)

|   iter    |  target   |   depth   | estimator | max_fe... | max_le... | min_leaf  | min_split | neighbors | sampling  |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.7163  [0m | [0m 4.371   [0m | [0m 143.1   [0m | [0m 0.7347  [0m | [0m 30.74   [0m | [0m 3.964   [0m | [0m 4.808   [0m | [0m 1.523   [0m | [0m 0.9029  [0m |
| [0m 2       [0m | [0m 0.5039  [0m | [0m 6.41    [0m | [0m 109.1   [0m | [0m 0.03038 [0m | [0m 48.56   [0m | [0m 16.82   [0m | [0m 5.822   [0m | [0m 2.636   [0m | [0m 0.4075  [0m |
| [0m 3       [0m | [0m 0.7102  [0m | [0m 3.738   [0m | [0m 83.47   [0m | [0m 0.4376  [0m | [0m 15.98   [0m | [0m 12.63   [0m | [0m 4.511   [0m | [0m 3.629   [0m | [0m 0.5402  [0m |
| [0m 4       [0m | [0m 0.6401  [0m | [0m 5.105   [0m | [0m 119.9   [0m | [0m 0.2077  [0m | [0m 26.68   [0m | [0m 12.26   [0m | [0m 2.8

| [0m 41      [0m | [0m 0.6558  [0m | [0m 5.527   [0m | [0m 143.5   [0m | [0m 0.0161  [0m | [0m 33.89   [0m | [0m 5.636   [0m | [0m 2.526   [0m | [0m 1.384   [0m | [0m 0.6485  [0m |
| [0m 42      [0m | [0m 0.708   [0m | [0m 2.486   [0m | [0m 148.9   [0m | [0m 0.2694  [0m | [0m 35.67   [0m | [0m 4.256   [0m | [0m 4.849   [0m | [0m 1.763   [0m | [0m 0.7005  [0m |
| [0m 43      [0m | [0m 0.7308  [0m | [0m 6.158   [0m | [0m 80.69   [0m | [0m 0.3338  [0m | [0m 14.98   [0m | [0m 9.99    [0m | [0m 5.998   [0m | [0m 3.392   [0m | [0m 0.8898  [0m |
| [0m 44      [0m | [0m 0.6816  [0m | [0m 3.996   [0m | [0m 81.81   [0m | [0m 0.436   [0m | [0m 15.86   [0m | [0m 11.87   [0m | [0m 7.341   [0m | [0m 2.732   [0m | [0m 0.5177  [0m |
| [0m 45      [0m | [0m 0.6861  [0m | [0m 4.225   [0m | [0m 143.5   [0m | [0m 0.2899  [0m | [0m 35.01   [0m | [0m 8.286   [0m | [0m 4.748   [0m | [0m 2.904   [0m | [0m 0.511

In [30]:
best_hyperparam_rf_PC3 = hyperparam_rf_PC3.max['params']
result_rf_PC3 = run_ml(data_load['PC3'], 7, best_hyperparam_rf_PC3, model.random_forest,do_optimize = False)
result_rf_PC3

{'Train': {'Gmeans': 0.857781710183322,
  'Recall': 0.9297615995898487,
  'pf': 0.20862567065621132,
  'cf_matrix': {'tn': 3835, 'fp': 1011, 'fn': 274, 'tp': 3627}},
 'Dev': {'Gmeans': 0.7442971666904423,
  'Recall': 0.7410714285714286,
  'pf': 0.2524630541871921,
  'cf_matrix': {'tn': 607, 'fp': 205, 'fn': 29, 'tp': 83}},
 'Test': {'Gmeans': 0.7462202228628494,
  'Recall': 0.7313432835820896,
  'pf': 0.23860021208907742,
  'cf_matrix': {'tn': 718, 'fp': 225, 'fn': 36, 'tp': 98}}}

### PC4

In [31]:
hyperparam_rf_PC4 = run_ml(data_load['PC4'], 7, bound_random_forest['PC4'], model.random_forest)

|   iter    |  target   |   depth   | estimator | max_fe... | max_le... | min_leaf  | min_split | neighbors | sampling  |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.8487  [0m | [0m 4.371   [0m | [0m 143.1   [0m | [0m 0.7347  [0m | [0m 30.74   [0m | [0m 3.964   [0m | [0m 4.808   [0m | [0m 1.523   [0m | [0m 0.9026  [0m |
| [0m 2       [0m | [0m 0.6328  [0m | [0m 6.41    [0m | [0m 109.1   [0m | [0m 0.03038 [0m | [0m 48.56   [0m | [0m 16.82   [0m | [0m 5.822   [0m | [0m 2.636   [0m | [0m 0.4056  [0m |
| [0m 3       [0m | [0m 0.8112  [0m | [0m 3.738   [0m | [0m 83.47   [0m | [0m 0.4376  [0m | [0m 15.98   [0m | [0m 12.63   [0m | [0m 4.511   [0m | [0m 3.629   [0m | [0m 0.5388  [0m |
| [0m 4       [0m | [0m 0.7601  [0m | [0m 5.105   [0m | [0m 119.9   [0m | [0m 0.2077  [0m | [0m 26.68   [0m | [0m 12.26   [0m | [0m 2.8

| [0m 41      [0m | [0m 0.8425  [0m | [0m 5.452   [0m | [0m 135.7   [0m | [0m 1.0     [0m | [0m 23.35   [0m | [0m 13.47   [0m | [0m 19.61   [0m | [0m 1.0     [0m | [0m 1.0     [0m |
| [0m 42      [0m | [0m 0.8485  [0m | [0m 4.001   [0m | [0m 142.0   [0m | [0m 1.0     [0m | [0m 20.67   [0m | [0m 19.37   [0m | [0m 15.03   [0m | [0m 1.0     [0m | [0m 1.0     [0m |
| [0m 43      [0m | [0m 0.8467  [0m | [0m 7.93    [0m | [0m 141.7   [0m | [0m 1.0     [0m | [0m 33.54   [0m | [0m 17.28   [0m | [0m 18.21   [0m | [0m 1.0     [0m | [0m 1.0     [0m |
| [0m 44      [0m | [0m 0.8181  [0m | [0m 6.105   [0m | [0m 14.03   [0m | [0m 1.0     [0m | [0m 48.47   [0m | [0m 20.0    [0m | [0m 20.0    [0m | [0m 10.0    [0m | [0m 1.0     [0m |
| [0m 45      [0m | [0m 0.0     [0m | [0m 1.0     [0m | [0m 138.0   [0m | [0m 1.0     [0m | [0m 11.08   [0m | [0m 17.95   [0m | [0m 20.0    [0m | [0m 1.0     [0m | [0m 0.272

In [32]:
best_hyperparam_rf_PC4 = hyperparam_rf_PC4.max['params']
result_rf_PC4 = run_ml(data_load['PC4'], 7, best_hyperparam_rf_PC4, model.random_forest,do_optimize = False)
result_rf_PC4

{'Train': {'Gmeans': 0.9286928647055591,
  'Recall': 0.9919465126880413,
  'pf': 0.1305272754900471,
  'cf_matrix': {'tn': 5722, 'fp': 859, 'fn': 53, 'tp': 6528}},
 'Dev': {'Gmeans': 0.8493524707027063,
  'Recall': 0.8636363636363636,
  'pf': 0.16469517743403095,
  'cf_matrix': {'tn': 918, 'fp': 181, 'fn': 21, 'tp': 133}},
 'Test': {'Gmeans': 0.8575584094169899,
  'Recall': 0.8707865168539326,
  'pf': 0.15546875,
  'cf_matrix': {'tn': 1081, 'fp': 199, 'fn': 23, 'tp': 155}}}

## CM1

In [33]:
hyperparam_rf_CM1 = run_ml(data_load['CM1'], 5, bound_random_forest['CM1'], model.random_forest)

|   iter    |  target   |   depth   | estimator | max_fe... | max_le... | min_leaf  | min_split | neighbors | sampling  |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.5937  [0m | [0m 4.371   [0m | [0m 143.1   [0m | [0m 0.7347  [0m | [0m 30.74   [0m | [0m 3.964   [0m | [0m 4.808   [0m | [0m 1.523   [0m | [0m 0.8994  [0m |
| [0m 2       [0m | [0m 0.3802  [0m | [0m 6.41    [0m | [0m 109.1   [0m | [0m 0.03038 [0m | [0m 48.56   [0m | [0m 16.82   [0m | [0m 5.822   [0m | [0m 2.636   [0m | [0m 0.3862  [0m |
| [0m 3       [0m | [0m 0.5646  [0m | [0m 3.738   [0m | [0m 83.47   [0m | [0m 0.4376  [0m | [0m 15.98   [0m | [0m 12.63   [0m | [0m 4.511   [0m | [0m 3.629   [0m | [0m 0.5238  [0m |
| [0m 4       [0m | [0m 0.4346  [0m | [0m 5.105   [0m | [0m 119.9   [0m | [0m 0.2077  [0m | [0m 26.68   [0m | [0m 12.26   [0m | [0m 2.8

| [0m 41      [0m | [0m 0.674   [0m | [0m 5.898   [0m | [0m 97.63   [0m | [0m 0.01    [0m | [0m 5.862   [0m | [0m 4.76    [0m | [0m 8.015   [0m | [0m 8.3     [0m | [0m 1.0     [0m |
| [0m 42      [0m | [0m 0.6723  [0m | [0m 5.097   [0m | [0m 80.12   [0m | [0m 0.4158  [0m | [0m 45.74   [0m | [0m 5.011   [0m | [0m 11.42   [0m | [0m 3.178   [0m | [0m 0.9585  [0m |
| [0m 43      [0m | [0m 0.6367  [0m | [0m 4.382   [0m | [0m 104.5   [0m | [0m 0.7695  [0m | [0m 5.408   [0m | [0m 3.801   [0m | [0m 5.041   [0m | [0m 6.675   [0m | [0m 0.7132  [0m |
| [0m 44      [0m | [0m 0.6671  [0m | [0m 8.481   [0m | [0m 104.1   [0m | [0m 0.8527  [0m | [0m 5.419   [0m | [0m 2.24    [0m | [0m 8.984   [0m | [0m 8.451   [0m | [0m 0.9668  [0m |
| [0m 45      [0m | [0m 0.6884  [0m | [0m 5.601   [0m | [0m 103.3   [0m | [0m 0.6048  [0m | [0m 5.262   [0m | [0m 3.652   [0m | [0m 10.85   [0m | [0m 7.584   [0m | [0m 0.730

In [34]:
best_hyperparam_rf_CM1 = hyperparam_rf_CM1.max['params']
result_rf_CM1 = run_ml(data_load['CM1'], 5, best_hyperparam_rf_CM1, model.random_forest,do_optimize = False)
result_rf_CM1

{'Train': {'Gmeans': 0.7671809178725112,
  'Recall': 0.8158123370981755,
  'pf': 0.2785515320334262,
  'cf_matrix': {'tn': 1036, 'fp': 400, 'fn': 212, 'tp': 939}},
 'Dev': {'Gmeans': 0.7166182154164563,
  'Recall': 0.725,
  'pf': 0.2916666666666667,
  'cf_matrix': {'tn': 255, 'fp': 105, 'fn': 11, 'tp': 29}},
 'Test': {'Gmeans': 0.6917212234625966,
  'Recall': 0.673469387755102,
  'pf': 0.289532293986637,
  'cf_matrix': {'tn': 319, 'fp': 130, 'fn': 16, 'tp': 33}}}

## JM1

In [35]:
hyperparam_rf_JM1 = run_ml(data_load['JM1'], 10, bound_random_forest['JM1'], model.random_forest)

|   iter    |  target   |   depth   | estimator | max_fe... | max_le... | min_leaf  | min_split | neighbors | sampling  |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.6395  [0m | [0m 4.371   [0m | [0m 143.1   [0m | [0m 0.7347  [0m | [0m 30.74   [0m | [0m 3.964   [0m | [0m 4.808   [0m | [0m 1.523   [0m | [0m 0.9121  [0m |
| [0m 2       [0m | [0m 0.5015  [0m | [0m 6.41    [0m | [0m 109.1   [0m | [0m 0.03038 [0m | [0m 48.56   [0m | [0m 16.82   [0m | [0m 5.822   [0m | [0m 2.636   [0m | [0m 0.4637  [0m |
| [0m 3       [0m | [0m 0.5748  [0m | [0m 3.738   [0m | [0m 83.47   [0m | [0m 0.4376  [0m | [0m 15.98   [0m | [0m 12.63   [0m | [0m 4.511   [0m | [0m 3.629   [0m | [0m 0.5839  [0m |
| [0m 4       [0m | [0m 0.5091  [0m | [0m 5.105   [0m | [0m 119.9   [0m | [0m 0.2077  [0m | [0m 26.68   [0m | [0m 12.26   [0m | [0m 2.8

| [0m 41      [0m | [0m 0.6471  [0m | [0m 3.59    [0m | [0m 102.0   [0m | [0m 0.6838  [0m | [0m 4.057   [0m | [0m 2.999   [0m | [0m 8.447   [0m | [0m 8.982   [0m | [0m 1.0     [0m |
| [0m 42      [0m | [0m 0.64    [0m | [0m 6.031   [0m | [0m 103.1   [0m | [0m 0.1991  [0m | [0m 7.033   [0m | [0m 2.149   [0m | [0m 9.975   [0m | [0m 8.425   [0m | [0m 0.875   [0m |
| [0m 43      [0m | [0m 0.5014  [0m | [0m 5.949   [0m | [0m 99.57   [0m | [0m 0.834   [0m | [0m 8.472   [0m | [0m 3.993   [0m | [0m 6.634   [0m | [0m 9.656   [0m | [0m 0.4103  [0m |
| [95m 44      [0m | [95m 0.6479  [0m | [95m 8.481   [0m | [95m 104.1   [0m | [95m 0.8527  [0m | [95m 5.419   [0m | [95m 2.24    [0m | [95m 8.984   [0m | [95m 8.451   [0m | [95m 0.971   [0m |
| [0m 45      [0m | [0m 0.6366  [0m | [0m 5.601   [0m | [0m 103.3   [0m | [0m 0.6048  [0m | [0m 5.262   [0m | [0m 3.652   [0m | [0m 10.85   [0m | [0m 7.584   [0m | 

In [36]:
best_hyperparam_rf_JM1 = hyperparam_rf_JM1.max['params']
result_rf_JM1 = run_ml(data_load['JM1'], 10, best_hyperparam_rf_JM1, model.random_forest,do_optimize = False)
result_rf_JM1

{'Train': {'Gmeans': 0.7207977128292039,
  'Recall': 0.7155151181490228,
  'pf': 0.2738806914886135,
  'cf_matrix': {'tn': 51622, 'fp': 19471, 'fn': 19636, 'tp': 49387}},
 'Dev': {'Gmeans': 0.6478853008760674,
  'Recall': 0.5810526315789474,
  'pf': 0.27759493670886076,
  'cf_matrix': {'tn': 5707, 'fp': 2193, 'fn': 796, 'tp': 1104}},
 'Test': {'Gmeans': 0.6601031619590525,
  'Recall': 0.6010461245839277,
  'pf': 0.27503702859747065,
  'cf_matrix': {'tn': 6363, 'fp': 2414, 'fn': 839, 'tp': 1264}}}

## Decission Tree

### KC1

In [37]:
hyperparam_dt_KC1 = run_ml(data_load['KC1'], 10, bound_decission_tree['KC1'], model.decission_tree)

|   iter    |  target   |   depth   | neighbors | sampling  |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.6913  [0m | [0m 4.371   [0m | [0m 9.556   [0m | [0m 0.8136  [0m |
| [0m 2       [0m | [0m 0.5932  [0m | [0m 6.388   [0m | [0m 2.404   [0m | [0m 0.4131  [0m |
| [95m 3       [0m | [95m 0.6956  [0m | [95m 1.523   [0m | [95m 8.796   [0m | [95m 0.7226  [0m |
| [0m 4       [0m | [0m 0.6787  [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9791  [0m |
| [0m 5       [0m | [0m 0.6303  [0m | [0m 8.492   [0m | [0m 2.911   [0m | [0m 0.431   [0m |
| [0m 6       [0m | [0m 0.6675  [0m | [0m 2.651   [0m | [0m 3.738   [0m | [0m 0.6695  [0m |
| [0m 7       [0m | [0m 0.6917  [0m | [0m 4.888   [0m | [0m 3.621   [0m | [0m 0.7301  [0m |
| [0m 8       [0m | [0m 0.6472  [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5594  [0m |
| [0m 9       [0m | [0m 0.6408  [0m | [0m 5.105   

In [38]:
best_hyperparam_dt_KC1 = hyperparam_dt_KC1.max['params']
result_dt_KC1 = run_ml(data_load['KC1'], 10, best_hyperparam_dt_KC1, model.decission_tree,do_optimize = False)
result_dt_KC1

{'Train': {'Gmeans': 0.7833417553844744,
  'Recall': 0.8819818542670825,
  'pf': 0.3042665189084361,
  'cf_matrix': {'tn': 10045, 'fp': 4393, 'fn': 1665, 'tp': 12443}},
 'Dev': {'Gmeans': 0.7458147949334402,
  'Recall': 0.8041237113402062,
  'pf': 0.30826600372902424,
  'cf_matrix': {'tn': 1113, 'fp': 496, 'fn': 57, 'tp': 234}},
 'Test': {'Gmeans': 0.6768152307307151,
  'Recall': 0.6840490797546013,
  'pf': 0.3303421200224341,
  'cf_matrix': {'tn': 1194, 'fp': 589, 'fn': 103, 'tp': 223}}}

### KC2

In [39]:
hyperparam_dt_KC2 = run_ml(data_load['KC2'], 10, bound_decission_tree['KC2'], model.decission_tree)

|   iter    |  target   |   depth   | neighbors | sampling  |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.7678  [0m | [0m 4.371   [0m | [0m 9.556   [0m | [0m 0.8271  [0m |
| [0m 2       [0m | [0m 0.6754  [0m | [0m 6.388   [0m | [0m 2.404   [0m | [0m 0.4556  [0m |
| [95m 3       [0m | [95m 0.8164  [0m | [95m 1.523   [0m | [95m 8.796   [0m | [95m 0.7427  [0m |
| [0m 4       [0m | [0m 0.7151  [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9806  [0m |
| [0m 5       [0m | [0m 0.6422  [0m | [0m 8.492   [0m | [0m 2.911   [0m | [0m 0.4723  [0m |
| [0m 6       [0m | [0m 0.8055  [0m | [0m 2.651   [0m | [0m 3.738   [0m | [0m 0.6935  [0m |
| [0m 7       [0m | [0m 0.7337  [0m | [0m 4.888   [0m | [0m 3.621   [0m | [0m 0.7496  [0m |
| [95m 8       [0m | [95m 0.823   [0m | [95m 2.255   [0m | [95m 3.629   [0m | [95m 0.5913  [0m |
| [0m 9       [0m | [0m 0.6977  [0m | [0m 5.1

In [40]:
best_hyperparam_dt_KC2 = hyperparam_dt_KC2.max['params']
result_dt_KC2 = run_ml(data_load['KC2'], 10, best_hyperparam_dt_KC2, model.decission_tree,do_optimize = False)
result_dt_KC2

{'Train': {'Gmeans': 0.7935919901931693,
  'Recall': 0.7630695443645084,
  'pf': 0.17466467958271237,
  'cf_matrix': {'tn': 2769, 'fp': 586, 'fn': 494, 'tp': 1591}},
 'Dev': {'Gmeans': 0.8255779474818965,
  'Recall': 0.8222222222222222,
  'pf': 0.17105263157894737,
  'cf_matrix': {'tn': 315, 'fp': 65, 'fn': 16, 'tp': 74}},
 'Test': {'Gmeans': 0.7750762282215242,
  'Recall': 0.7289719626168224,
  'pf': 0.17590361445783131,
  'cf_matrix': {'tn': 342, 'fp': 73, 'fn': 29, 'tp': 78}}}

### PC1

In [41]:
hyperparam_dt_PC1 = run_ml(data_load['PC1'], 5, bound_decission_tree['PC1'], model.decission_tree)

|   iter    |  target   |   depth   | neighbors | sampling  |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.724   [0m | [0m 4.371   [0m | [0m 9.556   [0m | [0m 0.7908  [0m |
| [0m 2       [0m | [0m 0.6335  [0m | [0m 6.388   [0m | [0m 2.404   [0m | [0m 0.3412  [0m |
| [0m 3       [0m | [0m 0.7237  [0m | [0m 1.523   [0m | [0m 8.796   [0m | [0m 0.6886  [0m |
| [0m 4       [0m | [0m 0.7006  [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9765  [0m |
| [0m 5       [0m | [0m 0.6216  [0m | [0m 8.492   [0m | [0m 2.911   [0m | [0m 0.3613  [0m |
| [0m 6       [0m | [0m 0.7148  [0m | [0m 2.651   [0m | [0m 3.738   [0m | [0m 0.629   [0m |
| [0m 7       [0m | [0m 0.724   [0m | [0m 4.888   [0m | [0m 3.621   [0m | [0m 0.697   [0m |
| [0m 8       [0m | [0m 0.6229  [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5054  [0m |
| [0m 9       [0m | [0m 0.692   [0m | [0m 5.105   [0m 

In [42]:
best_hyperparam_dt_PC1 = hyperparam_dt_PC1.max['params']
result_dt_PC1 = run_ml(data_load['PC1'], 5, best_hyperparam_dt_PC1, model.decission_tree,do_optimize = False)
result_dt_PC1

{'Train': {'Gmeans': 0.8628374827158535,
  'Recall': 0.9515151515151515,
  'pf': 0.2175757575757576,
  'cf_matrix': {'tn': 2582, 'fp': 718, 'fn': 160, 'tp': 3140}},
 'Dev': {'Gmeans': 0.8023457894065383,
  'Recall': 0.8225806451612904,
  'pf': 0.21739130434782608,
  'cf_matrix': {'tn': 648, 'fp': 180, 'fn': 11, 'tp': 51}},
 'Test': {'Gmeans': 0.7366380786483473,
  'Recall': 0.7142857142857143,
  'pf': 0.24031007751937986,
  'cf_matrix': {'tn': 784, 'fp': 248, 'fn': 22, 'tp': 55}}}

### PC3

In [43]:
hyperparam_dt_PC3 = run_ml(data_load['PC3'], 7, bound_decission_tree['PC3'], model.decission_tree)

|   iter    |  target   |   depth   | neighbors | sampling  |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.7083  [0m | [0m 4.371   [0m | [0m 9.556   [0m | [0m 0.8055  [0m |
| [0m 2       [0m | [0m 0.6419  [0m | [0m 6.388   [0m | [0m 2.404   [0m | [0m 0.3876  [0m |
| [0m 3       [0m | [0m 0.6906  [0m | [0m 1.523   [0m | [0m 8.796   [0m | [0m 0.7106  [0m |
| [0m 4       [0m | [0m 0.6648  [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9782  [0m |
| [0m 5       [0m | [0m 0.5898  [0m | [0m 8.492   [0m | [0m 2.911   [0m | [0m 0.4063  [0m |
| [0m 6       [0m | [0m 0.6963  [0m | [0m 2.651   [0m | [0m 3.738   [0m | [0m 0.6552  [0m |
| [0m 7       [0m | [0m 0.6911  [0m | [0m 4.888   [0m | [0m 3.621   [0m | [0m 0.7184  [0m |
| [0m 8       [0m | [0m 0.6863  [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5402  [0m |
| [0m 9       [0m | [0m 0.6528  [0m | [0m 5.105   [0m 

In [44]:
best_hyperparam_dt_PC3 = hyperparam_dt_PC3.max['params']
result_dt_PC3 = run_ml(data_load['PC3'], 7, best_hyperparam_dt_PC3, model.decission_tree,do_optimize = False)
result_dt_PC3

{'Train': {'Gmeans': 0.8411559167636316,
  'Recall': 0.9106918238993711,
  'pf': 0.22307057366900537,
  'cf_matrix': {'tn': 3765, 'fp': 1081, 'fn': 284, 'tp': 2896}},
 'Dev': {'Gmeans': 0.7381705494180523,
  'Recall': 0.7589285714285714,
  'pf': 0.28201970443349755,
  'cf_matrix': {'tn': 583, 'fp': 229, 'fn': 27, 'tp': 85}},
 'Test': {'Gmeans': 0.7575245225658789,
  'Recall': 0.7686567164179104,
  'pf': 0.25344644750795337,
  'cf_matrix': {'tn': 704, 'fp': 239, 'fn': 31, 'tp': 103}}}

### PC4

In [45]:
hyperparam_dt_PC4 = run_ml(data_load['PC4'], 7, bound_decission_tree['PC4'], model.decission_tree)

|   iter    |  target   |   depth   | neighbors | sampling  |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.8033  [0m | [0m 4.371   [0m | [0m 9.556   [0m | [0m 0.8049  [0m |
| [0m 2       [0m | [0m 0.7552  [0m | [0m 6.388   [0m | [0m 2.404   [0m | [0m 0.3856  [0m |
| [95m 3       [0m | [95m 0.8136  [0m | [95m 1.523   [0m | [95m 8.796   [0m | [95m 0.7097  [0m |
| [0m 4       [0m | [0m 0.7958  [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9781  [0m |
| [0m 5       [0m | [0m 0.769   [0m | [0m 8.492   [0m | [0m 2.911   [0m | [0m 0.4045  [0m |
| [0m 6       [0m | [0m 0.7708  [0m | [0m 2.651   [0m | [0m 3.738   [0m | [0m 0.6541  [0m |
| [0m 7       [0m | [0m 0.8107  [0m | [0m 4.888   [0m | [0m 3.621   [0m | [0m 0.7175  [0m |
| [0m 8       [0m | [0m 0.7817  [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5388  [0m |
| [0m 9       [0m | [0m 0.8019  [0m | [0m 5.105   

In [46]:
best_hyperparam_dt_PC4 = hyperparam_dt_PC4.max['params']
result_dt_PC4 = run_ml(data_load['PC4'], 7, best_hyperparam_dt_PC4, model.decission_tree,do_optimize = False)
result_dt_PC4

{'Train': {'Gmeans': 0.9106222045631851,
  'Recall': 0.9434960327001684,
  'pf': 0.12110621486096337,
  'cf_matrix': {'tn': 5784, 'fp': 797, 'fn': 235, 'tp': 3924}},
 'Dev': {'Gmeans': 0.8274383905794984,
  'Recall': 0.8116883116883117,
  'pf': 0.1565059144676979,
  'cf_matrix': {'tn': 927, 'fp': 172, 'fn': 29, 'tp': 125}},
 'Test': {'Gmeans': 0.8279908334182928,
  'Recall': 0.797752808988764,
  'pf': 0.140625,
  'cf_matrix': {'tn': 1100, 'fp': 180, 'fn': 36, 'tp': 142}}}

## CM1

In [47]:
hyperparam_dt_CM1 = run_ml(data_load['CM1'], 5, bound_decission_tree['CM1'], model.decission_tree)

|   iter    |  target   |   depth   | neighbors | sampling  |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.6759  [0m | [0m 4.371   [0m | [0m 9.556   [0m | [0m 0.7986  [0m |
| [0m 2       [0m | [0m 0.5664  [0m | [0m 6.388   [0m | [0m 2.404   [0m | [0m 0.3656  [0m |
| [0m 3       [0m | [0m 0.6567  [0m | [0m 1.523   [0m | [0m 8.796   [0m | [0m 0.7002  [0m |
| [0m 4       [0m | [0m 0.6458  [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9774  [0m |
| [0m 5       [0m | [0m 0.5916  [0m | [0m 8.492   [0m | [0m 2.911   [0m | [0m 0.3851  [0m |
| [0m 6       [0m | [0m 0.6666  [0m | [0m 2.651   [0m | [0m 3.738   [0m | [0m 0.6428  [0m |
| [0m 7       [0m | [0m 0.6518  [0m | [0m 4.888   [0m | [0m 3.621   [0m | [0m 0.7083  [0m |
| [0m 8       [0m | [0m 0.5454  [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5238  [0m |
| [0m 9       [0m | [0m 0.5696  [0m | [0m 5.105   [0m 

In [48]:
best_hyperparam_dt_CM1 = hyperparam_dt_CM1.max['params']
result_dt_CM1 = run_ml(data_load['CM1'], 5, best_hyperparam_dt_CM1, model.decission_tree,do_optimize = False)
result_dt_CM1

{'Train': {'Gmeans': 0.8400886780735183,
  'Recall': 0.9349220898258478,
  'pf': 0.24512534818941503,
  'cf_matrix': {'tn': 1084, 'fp': 352, 'fn': 71, 'tp': 1020}},
 'Dev': {'Gmeans': 0.6795627679291705,
  'Recall': 0.625,
  'pf': 0.2611111111111111,
  'cf_matrix': {'tn': 266, 'fp': 94, 'fn': 15, 'tp': 25}},
 'Test': {'Gmeans': 0.6494605524094196,
  'Recall': 0.5918367346938775,
  'pf': 0.2873051224944321,
  'cf_matrix': {'tn': 320, 'fp': 129, 'fn': 20, 'tp': 29}}}

## JM1

In [49]:
hyperparam_dt_JM1 = run_ml(data_load['JM1'], 10, bound_decission_tree['JM1'], model.decission_tree)

|   iter    |  target   |   depth   | neighbors | sampling  |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.6411  [0m | [0m 4.371   [0m | [0m 9.556   [0m | [0m 0.824   [0m |
| [0m 2       [0m | [0m 0.5058  [0m | [0m 6.388   [0m | [0m 2.404   [0m | [0m 0.4457  [0m |
| [95m 3       [0m | [95m 0.6477  [0m | [95m 1.523   [0m | [95m 8.796   [0m | [95m 0.7381  [0m |
| [0m 4       [0m | [0m 0.6137  [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9802  [0m |
| [0m 5       [0m | [0m 0.5068  [0m | [0m 8.492   [0m | [0m 2.911   [0m | [0m 0.4627  [0m |
| [0m 6       [0m | [0m 0.6443  [0m | [0m 2.651   [0m | [0m 3.738   [0m | [0m 0.6879  [0m |
| [0m 7       [0m | [0m 0.632   [0m | [0m 4.888   [0m | [0m 3.621   [0m | [0m 0.7451  [0m |
| [0m 8       [0m | [0m 0.563   [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5839  [0m |
| [0m 9       [0m | [0m 0.5232  [0m | [0m 5.105   

In [50]:
best_hyperparam_dt_JM1 = hyperparam_dt_JM1.max['params']
result_dt_JM1 = run_ml(data_load['JM1'], 10, best_hyperparam_dt_JM1, model.decission_tree,do_optimize = False)
result_dt_JM1

{'Train': {'Gmeans': 0.6619762659630904,
  'Recall': 0.6138689007925503,
  'pf': 0.2861463153897008,
  'cf_matrix': {'tn': 50750, 'fp': 20343, 'fn': 26455, 'tp': 42058}},
 'Dev': {'Gmeans': 0.6496488571335672,
  'Recall': 0.5921052631578947,
  'pf': 0.2872151898734177,
  'cf_matrix': {'tn': 5631, 'fp': 2269, 'fn': 775, 'tp': 1125}},
 'Test': {'Gmeans': 0.660102464334156,
  'Recall': 0.6110318592486923,
  'pf': 0.2868861797880825,
  'cf_matrix': {'tn': 6259, 'fp': 2518, 'fn': 818, 'tp': 1285}}}

## Naive Bayes

### KC1

In [51]:
hyperparam_nb_KC1 = run_ml(data_load['KC1'], 10, bound_naive_bayes['KC1'],  model.naive_bayes)

|   iter    |  target   | neighbors | sampling  | var_sm... |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.6007  [0m | [0m 4.371   [0m | [0m 0.9657  [0m | [0m 0.732   [0m |
| [0m 2       [0m | [0m 0.6002  [0m | [0m 6.388   [0m | [0m 0.4131  [0m | [0m 0.156   [0m |
| [0m 3       [0m | [0m 0.5976  [0m | [0m 1.523   [0m | [0m 0.9069  [0m | [0m 0.6011  [0m |
| [0m 4       [0m | [0m 0.5749  [0m | [0m 7.373   [0m | [0m 0.3189  [0m | [0m 0.9699  [0m |
| [95m 5       [0m | [95m 0.6047  [0m | [95m 8.492   [0m | [95m 0.4523  [0m | [95m 0.1818  [0m |
| [0m 6       [0m | [0m 0.5914  [0m | [0m 2.651   [0m | [0m 0.5162  [0m | [0m 0.5248  [0m |
| [0m 7       [0m | [0m 0.5855  [0m | [0m 4.888   [0m | [0m 0.5071  [0m | [0m 0.6119  [0m |
| [0m 8       [0m | [0m 0.5908  [0m | [0m 2.255   [0m | [0m 0.5078  [0m | [0m 0.3664  [0m |
| [0m 9       [0m | [0m 0.6026  [0m | [0m 5.105   

In [52]:
best_hyperparam_nb_KC1 = hyperparam_nb_KC1.max['params']
result_nb_KC1 = run_ml(data_load['KC1'], 10, best_hyperparam_nb_KC1, model.naive_bayes,do_optimize = False)
result_nb_KC1

{'Train': {'Gmeans': 0.5958363861627356,
  'Recall': 0.39569192408920906,
  'pf': 0.10278431915777808,
  'cf_matrix': {'tn': 12954, 'fp': 1484, 'fn': 8725, 'tp': 5713}},
 'Dev': {'Gmeans': 0.6301082681048098,
  'Recall': 0.44673539518900346,
  'pf': 0.11124922311995028,
  'cf_matrix': {'tn': 1430, 'fp': 179, 'fn': 161, 'tp': 130}},
 'Test': {'Gmeans': 0.6071446317611808,
  'Recall': 0.4110429447852761,
  'pf': 0.10319685922602355,
  'cf_matrix': {'tn': 1599, 'fp': 184, 'fn': 192, 'tp': 134}}}

## KC2

In [53]:
hyperparam_nb_KC2 = run_ml(data_load['KC2'], 10, bound_naive_bayes['KC2'], model.naive_bayes)

|   iter    |  target   | neighbors | sampling  | var_sm... |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.6081  [0m | [0m 4.371   [0m | [0m 0.9682  [0m | [0m 0.732   [0m |
| [95m 2       [0m | [95m 0.683   [0m | [95m 6.388   [0m | [95m 0.4556  [0m | [95m 0.156   [0m |
| [0m 3       [0m | [0m 0.5639  [0m | [0m 1.523   [0m | [0m 0.9137  [0m | [0m 0.6011  [0m |
| [0m 4       [0m | [0m 0.5137  [0m | [0m 7.373   [0m | [0m 0.3683  [0m | [0m 0.9699  [0m |
| [0m 5       [0m | [0m 0.617   [0m | [0m 8.492   [0m | [0m 0.492   [0m | [0m 0.1818  [0m |
| [0m 6       [0m | [0m 0.5712  [0m | [0m 2.651   [0m | [0m 0.5512  [0m | [0m 0.5248  [0m |
| [0m 7       [0m | [0m 0.5892  [0m | [0m 4.888   [0m | [0m 0.5428  [0m | [0m 0.6119  [0m |
| [0m 8       [0m | [0m 0.5892  [0m | [0m 2.255   [0m | [0m 0.5434  [0m | [0m 0.3664  [0m |
| [0m 9       [0m | [0m 0.6241  [0m | [0m 5.105   

In [54]:
best_hyperparam_nb_KC2 = hyperparam_nb_KC2.max['params']
result_nb_KC2 = run_ml(data_load['KC2'], 10, best_hyperparam_nb_KC2, model.naive_bayes,do_optimize = False)
result_nb_KC2

{'Train': {'Gmeans': 0.623855047224011,
  'Recall': 0.4187779433681073,
  'pf': 0.07064083457526081,
  'cf_matrix': {'tn': 3118, 'fp': 237, 'fn': 1950, 'tp': 1405}},
 'Dev': {'Gmeans': 0.7078507127770286,
  'Recall': 0.5333333333333333,
  'pf': 0.060526315789473685,
  'cf_matrix': {'tn': 357, 'fp': 23, 'fn': 42, 'tp': 48}},
 'Test': {'Gmeans': 0.6467853345594956,
  'Recall': 0.4485981308411215,
  'pf': 0.06746987951807229,
  'cf_matrix': {'tn': 387, 'fp': 28, 'fn': 59, 'tp': 48}}}

### PC1

In [55]:
hyperparam_nb_PC1 = run_ml(data_load['PC1'], 5, bound_naive_bayes['PC1'], model.naive_bayes)

|   iter    |  target   | neighbors | sampling  | var_sm... |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.5387  [0m | [0m 4.371   [0m | [0m 0.9615  [0m | [0m 0.732   [0m |
| [95m 2       [0m | [95m 0.5498  [0m | [95m 6.388   [0m | [95m 0.3412  [0m | [95m 0.156   [0m |
| [0m 3       [0m | [0m 0.5256  [0m | [0m 1.523   [0m | [0m 0.8955  [0m | [0m 0.6011  [0m |
| [0m 4       [0m | [0m 0.5144  [0m | [0m 7.373   [0m | [0m 0.2355  [0m | [0m 0.9699  [0m |
| [95m 5       [0m | [95m 0.5631  [0m | [95m 8.492   [0m | [95m 0.3852  [0m | [95m 0.1818  [0m |
| [0m 6       [0m | [0m 0.5544  [0m | [0m 2.651   [0m | [0m 0.4569  [0m | [0m 0.5248  [0m |
| [0m 7       [0m | [0m 0.5541  [0m | [0m 4.888   [0m | [0m 0.4467  [0m | [0m 0.6119  [0m |
| [0m 8       [0m | [0m 0.553   [0m | [0m 2.255   [0m | [0m 0.4474  [0m | [0m 0.3664  [0m |
| [0m 9       [0m | [0m 0.5477  [0m | [0m 5.1

In [56]:
best_hyperparam_nb_PC1 = hyperparam_nb_PC1.max['params']
result_nb_PC1 = run_ml(data_load['PC1'], 5, best_hyperparam_nb_PC1, model.naive_bayes,do_optimize = False)
result_nb_PC1

{'Train': {'Gmeans': 0.5344111613605751,
  'Recall': 0.3090047393364929,
  'pf': 0.07575757575757576,
  'cf_matrix': {'tn': 3050, 'fp': 250, 'fn': 729, 'tp': 326}},
 'Dev': {'Gmeans': 0.5634164136462172,
  'Recall': 0.3387096774193548,
  'pf': 0.06280193236714976,
  'cf_matrix': {'tn': 776, 'fp': 52, 'fn': 41, 'tp': 21}},
 'Test': {'Gmeans': 0.547272048304618,
  'Recall': 0.3246753246753247,
  'pf': 0.07751937984496124,
  'cf_matrix': {'tn': 952, 'fp': 80, 'fn': 52, 'tp': 25}}}

### PC3

In [57]:
hyperparam_nb_PC3 = run_ml(data_load['PC3'], 7, bound_naive_bayes['PC3'], model.naive_bayes)

|   iter    |  target   | neighbors | sampling  | var_sm... |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.7098  [0m | [0m 4.371   [0m | [0m 0.9642  [0m | [0m 0.732   [0m |
| [0m 2       [0m | [0m 0.7044  [0m | [0m 6.388   [0m | [0m 0.3876  [0m | [0m 0.156   [0m |
| [0m 3       [0m | [0m 0.7073  [0m | [0m 1.523   [0m | [0m 0.9029  [0m | [0m 0.6011  [0m |
| [0m 4       [0m | [0m 0.3869  [0m | [0m 7.373   [0m | [0m 0.2893  [0m | [0m 0.9699  [0m |
| [0m 5       [0m | [0m 0.6938  [0m | [0m 8.492   [0m | [0m 0.4285  [0m | [0m 0.1818  [0m |
| [0m 6       [0m | [0m 0.668   [0m | [0m 2.651   [0m | [0m 0.4952  [0m | [0m 0.5248  [0m |
| [0m 7       [0m | [0m 0.67    [0m | [0m 4.888   [0m | [0m 0.4857  [0m | [0m 0.6119  [0m |
| [0m 8       [0m | [0m 0.6912  [0m | [0m 2.255   [0m | [0m 0.4864  [0m | [0m 0.3664  [0m |
| [95m 9       [0m | [95m 0.7132  [0m | [95m 5.105   [

In [58]:
best_hyperparam_nb_PC3 = hyperparam_nb_PC3.max['params']
result_nb_PC3 = run_ml(data_load['PC3'], 7, best_hyperparam_nb_PC3, model.naive_bayes,do_optimize = False)
result_nb_PC3

{'Train': {'Gmeans': 0.7578341681134798,
  'Recall': 0.8810126582278481,
  'pf': 0.3481221626083368,
  'cf_matrix': {'tn': 3159, 'fp': 1687, 'fn': 470, 'tp': 3480}},
 'Dev': {'Gmeans': 0.7161613305479939,
  'Recall': 0.8214285714285714,
  'pf': 0.37561576354679804,
  'cf_matrix': {'tn': 507, 'fp': 305, 'fn': 20, 'tp': 92}},
 'Test': {'Gmeans': 0.7123974797796772,
  'Recall': 0.7910447761194029,
  'pf': 0.3584305408271474,
  'cf_matrix': {'tn': 605, 'fp': 338, 'fn': 28, 'tp': 106}}}

   ### PC4

In [59]:
hyperparam_nb_PC4 = run_ml(data_load['PC4'], 7, bound_naive_bayes['PC4'], model.naive_bayes)

|   iter    |  target   | neighbors | sampling  | var_sm... |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.7329  [0m | [0m 4.371   [0m | [0m 0.9641  [0m | [0m 0.732   [0m |
| [95m 2       [0m | [95m 0.7356  [0m | [95m 6.388   [0m | [95m 0.3857  [0m | [95m 0.156   [0m |
| [95m 3       [0m | [95m 0.7466  [0m | [95m 1.523   [0m | [95m 0.9026  [0m | [95m 0.6011  [0m |
| [0m 4       [0m | [0m 0.5906  [0m | [0m 7.373   [0m | [0m 0.2871  [0m | [0m 0.9699  [0m |
| [0m 5       [0m | [0m 0.7451  [0m | [0m 8.492   [0m | [0m 0.4267  [0m | [0m 0.1818  [0m |
| [0m 6       [0m | [0m 0.7283  [0m | [0m 2.651   [0m | [0m 0.4936  [0m | [0m 0.5248  [0m |
| [0m 7       [0m | [0m 0.7313  [0m | [0m 4.888   [0m | [0m 0.4841  [0m | [0m 0.6119  [0m |
| [0m 8       [0m | [0m 0.7292  [0m | [0m 2.255   [0m | [0m 0.4848  [0m | [0m 0.3664  [0m |
| [95m 9       [0m | [95m 0.7518  [0m | [95m 

In [60]:
best_hyperparam_nb_PC4 = hyperparam_nb_PC4.max['params']
result_nb_PC4 = run_ml(data_load['PC4'], 7, best_hyperparam_nb_PC4, model.naive_bayes,do_optimize = False)
result_nb_PC4

{'Train': {'Gmeans': 0.7550203380074996,
  'Recall': 0.7384914631510698,
  'pf': 0.22808083877830118,
  'cf_matrix': {'tn': 5080, 'fp': 1501, 'fn': 1210, 'tp': 3417}},
 'Dev': {'Gmeans': 0.7583169585480056,
  'Recall': 0.7532467532467533,
  'pf': 0.23657870791628755,
  'cf_matrix': {'tn': 839, 'fp': 260, 'fn': 38, 'tp': 116}},
 'Test': {'Gmeans': 0.7494379916782886,
  'Recall': 0.7247191011235955,
  'pf': 0.225,
  'cf_matrix': {'tn': 992, 'fp': 288, 'fn': 49, 'tp': 129}}}

## CM1

In [61]:
hyperparam_nb_CM1 = run_ml(data_load['CM1'], 5, bound_naive_bayes['CM1'], model.naive_bayes)

|   iter    |  target   | neighbors | sampling  | var_sm... |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.6164  [0m | [0m 4.371   [0m | [0m 0.963   [0m | [0m 0.732   [0m |
| [95m 2       [0m | [95m 0.6447  [0m | [95m 6.388   [0m | [95m 0.3657  [0m | [95m 0.156   [0m |
| [0m 3       [0m | [0m 0.6215  [0m | [0m 1.523   [0m | [0m 0.8994  [0m | [0m 0.6011  [0m |
| [0m 4       [0m | [0m 0.6204  [0m | [0m 7.373   [0m | [0m 0.2639  [0m | [0m 0.9699  [0m |
| [0m 5       [0m | [0m 0.6255  [0m | [0m 8.492   [0m | [0m 0.408   [0m | [0m 0.1818  [0m |
| [0m 6       [0m | [0m 0.6416  [0m | [0m 2.651   [0m | [0m 0.4771  [0m | [0m 0.5248  [0m |
| [0m 7       [0m | [0m 0.6228  [0m | [0m 4.888   [0m | [0m 0.4673  [0m | [0m 0.6119  [0m |
| [0m 8       [0m | [0m 0.6365  [0m | [0m 2.255   [0m | [0m 0.468   [0m | [0m 0.3664  [0m |
| [0m 9       [0m | [0m 0.6078  [0m | [0m 5.105   

In [62]:
best_hyperparam_nb_CM1 = hyperparam_nb_CM1.max['params']
result_nb_CM1 = run_ml(data_load['CM1'], 5, best_hyperparam_nb_CM1, model.naive_bayes,do_optimize = False)
result_nb_CM1

{'Train': {'Gmeans': 0.5680576770535674,
  'Recall': 0.38615179760319573,
  'pf': 0.16434540389972144,
  'cf_matrix': {'tn': 1200, 'fp': 236, 'fn': 461, 'tp': 290}},
 'Dev': {'Gmeans': 0.6667708251965838,
  'Recall': 0.55,
  'pf': 0.19166666666666668,
  'cf_matrix': {'tn': 291, 'fp': 69, 'fn': 18, 'tp': 22}},
 'Test': {'Gmeans': 0.5830819377728939,
  'Recall': 0.40816326530612246,
  'pf': 0.16703786191536749,
  'cf_matrix': {'tn': 374, 'fp': 75, 'fn': 29, 'tp': 20}}}

## JM1

In [63]:
hyperparam_nb_JM1 = run_ml(data_load['JM1'], 10, bound_naive_bayes['JM1'], model.naive_bayes)

|   iter    |  target   | neighbors | sampling  | var_sm... |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.3415  [0m | [0m 4.371   [0m | [0m 0.9676  [0m | [0m 0.732   [0m |
| [0m 2       [0m | [0m 0.3384  [0m | [0m 6.388   [0m | [0m 0.4458  [0m | [0m 0.156   [0m |
| [0m 3       [0m | [0m 0.3149  [0m | [0m 1.523   [0m | [0m 0.9121  [0m | [0m 0.6011  [0m |
| [0m 4       [0m | [0m 0.2129  [0m | [0m 7.373   [0m | [0m 0.3568  [0m | [0m 0.9699  [0m |
| [0m 5       [0m | [0m 0.3391  [0m | [0m 8.492   [0m | [0m 0.4827  [0m | [0m 0.1818  [0m |
| [0m 6       [0m | [0m 0.2841  [0m | [0m 2.651   [0m | [0m 0.5431  [0m | [0m 0.5248  [0m |
| [0m 7       [0m | [0m 0.2667  [0m | [0m 4.888   [0m | [0m 0.5346  [0m | [0m 0.6119  [0m |
| [0m 8       [0m | [0m 0.3078  [0m | [0m 2.255   [0m | [0m 0.5352  [0m | [0m 0.3664  [0m |
| [95m 9       [0m | [95m 0.3616  [0m | [95m 5.105   [

In [64]:
best_hyperparam_nb_JM1 = hyperparam_nb_JM1.max['params']
result_nb_JM1 = run_ml(data_load['JM1'], 10, best_hyperparam_nb_JM1, model.naive_bayes,do_optimize = False)
result_nb_JM1

{'Train': {'Gmeans': 0.4496929376838975,
  'Recall': 0.21478907909358164,
  'pf': 0.058500836931906094,
  'cf_matrix': {'tn': 66934, 'fp': 4159, 'fn': 55823, 'tp': 15270}},
 'Dev': {'Gmeans': 0.4701226681600467,
  'Recall': 0.23578947368421052,
  'pf': 0.06265822784810127,
  'cf_matrix': {'tn': 7405, 'fp': 495, 'fn': 1452, 'tp': 448}},
 'Test': {'Gmeans': 0.46349413632688236,
  'Recall': 0.2282453637660485,
  'pf': 0.05879001936880483,
  'cf_matrix': {'tn': 8261, 'fp': 516, 'fn': 1623, 'tp': 480}}}

## Logistic Regression

In [65]:
import warnings
warnings.filterwarnings('ignore')

### KC1

In [66]:
hyperparam_lrQ_KC1 = run_ml(data_load['KC1'], 10, bound_logistic_regression['KC1'], model.Logistic_regression)

|   iter    |  target   |     c     | max_iter  | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.7097  [0m | [0m 4.371   [0m | [0m 192.6   [0m | [0m 7.588   [0m | [0m 0.7209  [0m |
| [95m 2       [0m | [95m 0.7217  [0m | [95m 2.404   [0m | [95m 73.4    [0m | [95m 1.523   [0m | [95m 0.9069  [0m |
| [95m 3       [0m | [95m 0.7266  [0m | [95m 6.41    [0m | [95m 156.2   [0m | [95m 1.185   [0m | [95m 0.9791  [0m |
| [0m 4       [0m | [0m 0.6336  [0m | [0m 8.492   [0m | [0m 81.85   [0m | [0m 2.636   [0m | [0m 0.4321  [0m |
| [0m 5       [0m | [0m 0.673   [0m | [0m 3.738   [0m | [0m 128.7   [0m | [0m 4.888   [0m | [0m 0.5071  [0m |
| [0m 6       [0m | [0m 0.6931  [0m | [0m 6.507   [0m | [0m 70.92   [0m | [0m 3.629   [0m | [0m 0.5594  [0m |
| [0m 7       [0m | [0m 0.6897  [0m | [0m 5.105   [0m | [0m 167.8   [0m | [0m 2.797   [0m | [0m 0

In [67]:
best_hyperparam_lr_KC1 = hyperparam_lr_KC1.max['params']
result_lr_KC1 = run_ml(data_load['KC1'], 10, best_hyperparam_lr_KC1, model.Logistic_regression,do_optimize = False)
result_lr_KC1

{'Train': {'Gmeans': 0.7164220646322574,
  'Recall': 0.6779303062302007,
  'pf': 0.2429006787643718,
  'cf_matrix': {'tn': 10931, 'fp': 3507, 'fn': 4270, 'tp': 8988}},
 'Dev': {'Gmeans': 0.7331385073900083,
  'Recall': 0.7353951890034365,
  'pf': 0.26911124922311996,
  'cf_matrix': {'tn': 1176, 'fp': 433, 'fn': 77, 'tp': 214}},
 'Test': {'Gmeans': 0.7102772005295286,
  'Recall': 0.6717791411042945,
  'pf': 0.24901850813236118,
  'cf_matrix': {'tn': 1339, 'fp': 444, 'fn': 107, 'tp': 219}}}

### KC2

In [68]:
hyperparam_lr_KC2 = run_ml(data_load['KC2'], 10, bound_logistic_regression['KC2'], model.Logistic_regression)

|   iter    |  target   |     c     | max_iter  | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.8243  [0m | [0m 4.371   [0m | [0m 192.6   [0m | [0m 7.588   [0m | [0m 0.7411  [0m |
| [0m 2       [0m | [0m 0.8138  [0m | [0m 2.404   [0m | [0m 73.4    [0m | [0m 1.523   [0m | [0m 0.9137  [0m |
| [0m 3       [0m | [0m 0.8125  [0m | [0m 6.41    [0m | [0m 156.2   [0m | [0m 1.185   [0m | [0m 0.9806  [0m |
| [0m 4       [0m | [0m 0.789   [0m | [0m 8.492   [0m | [0m 81.85   [0m | [0m 2.636   [0m | [0m 0.4733  [0m |
| [0m 5       [0m | [0m 0.8076  [0m | [0m 3.738   [0m | [0m 128.7   [0m | [0m 4.888   [0m | [0m 0.5428  [0m |
| [0m 6       [0m | [0m 0.8004  [0m | [0m 6.507   [0m | [0m 70.92   [0m | [0m 3.629   [0m | [0m 0.5913  [0m |
| [0m 7       [0m | [0m 0.8221  [0m | [0m 5.105   [0m | [0m 167.8   [0m | [0m 2.797   [0m | [0m 0.6867  [0m 

In [69]:
best_hyperparam_lr_KC2 = hyperparam_lr_KC2.max['params']
result_lr_KC2 = run_ml(data_load['KC2'], 10, best_hyperparam_lr_KC2, model.Logistic_regression,do_optimize = False)
result_lr_KC2

{'Train': {'Gmeans': 0.7529472117490186,
  'Recall': 0.6692640692640692,
  'pf': 0.15290611028315945,
  'cf_matrix': {'tn': 2842, 'fp': 513, 'fn': 764, 'tp': 1546}},
 'Dev': {'Gmeans': 0.8308031765277832,
  'Recall': 0.8222222222222222,
  'pf': 0.16052631578947368,
  'cf_matrix': {'tn': 319, 'fp': 61, 'fn': 16, 'tp': 74}},
 'Test': {'Gmeans': 0.764809864632722,
  'Recall': 0.6915887850467289,
  'pf': 0.15421686746987953,
  'cf_matrix': {'tn': 351, 'fp': 64, 'fn': 33, 'tp': 74}}}

### PC1

In [70]:
hyperparam_lr_PC1 = run_ml(data_load['PC1'], 5, bound_logistic_regression['PC1'], model.Logistic_regression)

|   iter    |  target   |     c     | max_iter  | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.6101  [0m | [0m 4.371   [0m | [0m 192.6   [0m | [0m 7.588   [0m | [0m 0.6867  [0m |
| [95m 2       [0m | [95m 0.6257  [0m | [95m 2.404   [0m | [95m 73.4    [0m | [95m 1.523   [0m | [95m 0.8955  [0m |
| [95m 3       [0m | [95m 0.6784  [0m | [95m 6.41    [0m | [95m 156.2   [0m | [95m 1.185   [0m | [95m 0.9765  [0m |
| [0m 4       [0m | [0m 0.4981  [0m | [0m 8.492   [0m | [0m 81.85   [0m | [0m 2.636   [0m | [0m 0.3626  [0m |
| [0m 5       [0m | [0m 0.5397  [0m | [0m 3.738   [0m | [0m 128.7   [0m | [0m 4.888   [0m | [0m 0.4467  [0m |
| [0m 6       [0m | [0m 0.5338  [0m | [0m 6.507   [0m | [0m 70.92   [0m | [0m 3.629   [0m | [0m 0.5054  [0m |
| [0m 7       [0m | [0m 0.5539  [0m | [0m 5.105   [0m | [0m 167.8   [0m | [0m 2.797   [0m | [0m 0

In [71]:
best_hyperparam_lr_PC1 = hyperparam_lr_PC1.max['params']
result_lr_PC1 = run_ml(data_load['PC1'], 5, best_hyperparam_lr_PC1, model.Logistic_regression,do_optimize = False)
result_lr_PC1

{'Train': {'Gmeans': 0.7420743764376245,
  'Recall': 0.6745454545454546,
  'pf': 0.18363636363636363,
  'cf_matrix': {'tn': 2694, 'fp': 606, 'fn': 1074, 'tp': 2226}},
 'Dev': {'Gmeans': 0.7265769982119473,
  'Recall': 0.6612903225806451,
  'pf': 0.20169082125603865,
  'cf_matrix': {'tn': 661, 'fp': 167, 'fn': 21, 'tp': 41}},
 'Test': {'Gmeans': 0.6889705061911494,
  'Recall': 0.5974025974025974,
  'pf': 0.2054263565891473,
  'cf_matrix': {'tn': 820, 'fp': 212, 'fn': 31, 'tp': 46}}}

### PC3

In [72]:
hyperparam_lr_PC3 = run_ml(data_load['PC3'], 7, bound_logistic_regression['PC3'], model.Logistic_regression)

|   iter    |  target   |     c     | max_iter  | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.688   [0m | [0m 4.371   [0m | [0m 192.6   [0m | [0m 7.588   [0m | [0m 0.7088  [0m |
| [95m 2       [0m | [95m 0.7116  [0m | [95m 2.404   [0m | [95m 73.4    [0m | [95m 1.523   [0m | [95m 0.9029  [0m |
| [0m 3       [0m | [0m 0.7113  [0m | [0m 6.41    [0m | [0m 156.2   [0m | [0m 1.185   [0m | [0m 0.9782  [0m |
| [0m 4       [0m | [0m 0.6234  [0m | [0m 8.492   [0m | [0m 81.85   [0m | [0m 2.636   [0m | [0m 0.4075  [0m |
| [0m 5       [0m | [0m 0.6723  [0m | [0m 3.738   [0m | [0m 128.7   [0m | [0m 4.888   [0m | [0m 0.4857  [0m |
| [0m 6       [0m | [0m 0.6554  [0m | [0m 6.507   [0m | [0m 70.92   [0m | [0m 3.629   [0m | [0m 0.5402  [0m |
| [0m 7       [0m | [0m 0.7029  [0m | [0m 5.105   [0m | [0m 167.8   [0m | [0m 2.797   [0m | [0m 0.6475 

In [73]:
best_hyperparam_lr_PC3 = hyperparam_lr_PC3.max['params']
result_lr_PC3 = run_ml(data_load['PC3'], 7, best_hyperparam_lr_PC3, model.Logistic_regression,do_optimize = False)
result_lr_PC3

{'Train': {'Gmeans': 0.7681325276498187,
  'Recall': 0.7586292525453191,
  'pf': 0.22224515063970285,
  'cf_matrix': {'tn': 3769, 'fp': 1077, 'fn': 972, 'tp': 3055}},
 'Dev': {'Gmeans': 0.7179554744799145,
  'Recall': 0.6964285714285714,
  'pf': 0.25985221674876846,
  'cf_matrix': {'tn': 601, 'fp': 211, 'fn': 34, 'tp': 78}},
 'Test': {'Gmeans': 0.7284518676090967,
  'Recall': 0.6940298507462687,
  'pf': 0.2354188759278897,
  'cf_matrix': {'tn': 721, 'fp': 222, 'fn': 41, 'tp': 93}}}

### PC4

In [74]:
hyperparam_lr_PC4 = run_ml(data_load['PC4'], 7, bound_logistic_regression['PC4'], model.Logistic_regression)

|   iter    |  target   |     c     | max_iter  | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.8072  [0m | [0m 4.371   [0m | [0m 192.6   [0m | [0m 7.588   [0m | [0m 0.7079  [0m |
| [95m 2       [0m | [95m 0.8213  [0m | [95m 2.404   [0m | [95m 73.4    [0m | [95m 1.523   [0m | [95m 0.9026  [0m |
| [95m 3       [0m | [95m 0.8305  [0m | [95m 6.41    [0m | [95m 156.2   [0m | [95m 1.185   [0m | [95m 0.9781  [0m |
| [0m 4       [0m | [0m 0.7486  [0m | [0m 8.492   [0m | [0m 81.85   [0m | [0m 2.636   [0m | [0m 0.4056  [0m |
| [0m 5       [0m | [0m 0.7648  [0m | [0m 3.738   [0m | [0m 128.7   [0m | [0m 4.888   [0m | [0m 0.4841  [0m |
| [0m 6       [0m | [0m 0.7827  [0m | [0m 6.507   [0m | [0m 70.92   [0m | [0m 3.629   [0m | [0m 0.5388  [0m |
| [0m 7       [0m | [0m 0.8175  [0m | [0m 5.105   [0m | [0m 167.8   [0m | [0m 2.797   [0m | [0m 0

In [75]:
best_hyperparam_lr_PC4 = hyperparam_lr_PC4.max['params']
result_lr_PC4 = run_ml(data_load['PC4'], 7, best_hyperparam_lr_PC4, model.Logistic_regression,do_optimize = False)
result_lr_PC4

{'Train': {'Gmeans': 0.8618634000094563,
  'Recall': 0.90711131414649,
  'pf': 0.18112748822367422,
  'cf_matrix': {'tn': 5389, 'fp': 1192, 'fn': 610, 'tp': 5957}},
 'Dev': {'Gmeans': 0.8461080019303671,
  'Recall': 0.8636363636363636,
  'pf': 0.1710646041856233,
  'cf_matrix': {'tn': 911, 'fp': 188, 'fn': 21, 'tp': 133}},
 'Test': {'Gmeans': 0.817529807380169,
  'Recall': 0.8202247191011236,
  'pf': 0.18515625,
  'cf_matrix': {'tn': 1043, 'fp': 237, 'fn': 32, 'tp': 146}}}

### CM1

In [76]:
hyperparam_lr_CM1 = run_ml(data_load['CM1'], 5, bound_logistic_regression['CM1'], model.Logistic_regression)

|   iter    |  target   |     c     | max_iter  | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.6724  [0m | [0m 4.371   [0m | [0m 192.6   [0m | [0m 7.588   [0m | [0m 0.6984  [0m |
| [95m 2       [0m | [95m 0.7374  [0m | [95m 2.404   [0m | [95m 73.4    [0m | [95m 1.523   [0m | [95m 0.8994  [0m |
| [95m 3       [0m | [95m 0.7444  [0m | [95m 6.41    [0m | [95m 156.2   [0m | [95m 1.185   [0m | [95m 0.9774  [0m |
| [0m 4       [0m | [0m 0.611   [0m | [0m 8.492   [0m | [0m 81.85   [0m | [0m 2.636   [0m | [0m 0.3862  [0m |
| [0m 5       [0m | [0m 0.5698  [0m | [0m 3.738   [0m | [0m 128.7   [0m | [0m 4.888   [0m | [0m 0.4673  [0m |
| [0m 6       [0m | [0m 0.6305  [0m | [0m 6.507   [0m | [0m 70.92   [0m | [0m 3.629   [0m | [0m 0.5238  [0m |
| [0m 7       [0m | [0m 0.6713  [0m | [0m 5.105   [0m | [0m 167.8   [0m | [0m 2.797   [0m | [0m 0

In [77]:
best_hyperparam_lr_CM1 = hyperparam_lr_CM1.max['params']
result_lr_CM1 = run_ml(data_load['CM1'], 5, best_hyperparam_lr_CM1, model.Logistic_regression,do_optimize = False)
result_lr_CM1

{'Train': {'Gmeans': 0.7872398784007238,
  'Recall': 0.7554806070826307,
  'pf': 0.1796657381615599,
  'cf_matrix': {'tn': 1178, 'fp': 258, 'fn': 290, 'tp': 896}},
 'Dev': {'Gmeans': 0.7720823214600313,
  'Recall': 0.725,
  'pf': 0.17777777777777778,
  'cf_matrix': {'tn': 296, 'fp': 64, 'fn': 11, 'tp': 29}},
 'Test': {'Gmeans': 0.7200846376425808,
  'Recall': 0.6326530612244898,
  'pf': 0.18040089086859687,
  'cf_matrix': {'tn': 368, 'fp': 81, 'fn': 18, 'tp': 31}}}

### JM1

In [78]:
hyperparam_lr_JM1 = run_ml(data_load['JM1'], 10, bound_logistic_regression['JM1'], model.Logistic_regression)

|   iter    |  target   |     c     | max_iter  | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.574   [0m | [0m 4.371   [0m | [0m 192.6   [0m | [0m 7.588   [0m | [0m 0.7364  [0m |
| [95m 2       [0m | [95m 0.6296  [0m | [95m 2.404   [0m | [95m 73.4    [0m | [95m 1.523   [0m | [95m 0.9121  [0m |
| [95m 3       [0m | [95m 0.6486  [0m | [95m 6.41    [0m | [95m 156.2   [0m | [95m 1.185   [0m | [95m 0.9802  [0m |
| [0m 4       [0m | [0m 0.4522  [0m | [0m 8.492   [0m | [0m 81.85   [0m | [0m 2.636   [0m | [0m 0.4637  [0m |
| [0m 5       [0m | [0m 0.4891  [0m | [0m 3.738   [0m | [0m 128.7   [0m | [0m 4.888   [0m | [0m 0.5346  [0m |
| [0m 6       [0m | [0m 0.5237  [0m | [0m 6.507   [0m | [0m 70.92   [0m | [0m 3.629   [0m | [0m 0.5839  [0m |
| [0m 7       [0m | [0m 0.5606  [0m | [0m 5.105   [0m | [0m 167.8   [0m | [0m 2.797   [0m | [0m 0

In [79]:
best_hyperparam_lr_JM1 = hyperparam_lr_JM1.max['params']
result_lr_JM1 = run_ml(data_load['JM1'], 10, best_hyperparam_lr_JM1, model.Logistic_regression,do_optimize = False)
result_lr_JM1

{'Train': {'Gmeans': 0.6568167341103037,
  'Recall': 0.5905820061115571,
  'pf': 0.26952020592744713,
  'cf_matrix': {'tn': 51932, 'fp': 19161, 'fn': 29074, 'tp': 41939}},
 'Dev': {'Gmeans': 0.6496756737116284,
  'Recall': 0.58,
  'pf': 0.2722784810126582,
  'cf_matrix': {'tn': 5749, 'fp': 2151, 'fn': 798, 'tp': 1102}},
 'Test': {'Gmeans': 0.6553199898117448,
  'Recall': 0.5882073228720875,
  'pf': 0.2699099920246098,
  'cf_matrix': {'tn': 6408, 'fp': 2369, 'fn': 866, 'tp': 1237}}}

## KNN

### KC1

In [80]:
hyperparam_knn_KC1 = run_ml(data_load['KC1'], 10, bound_knn['KC1'], model.knn)

|   iter    |  target   |   leaf    |     n     | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.7082  [0m | [0m 43.71   [0m | [0m 9.556   [0m | [0m 7.588   [0m | [0m 0.7209  [0m |
| [0m 2       [0m | [0m 0.6524  [0m | [0m 24.04   [0m | [0m 2.404   [0m | [0m 1.523   [0m | [0m 0.9069  [0m |
| [95m 3       [0m | [95m 0.7129  [0m | [95m 64.1    [0m | [95m 7.373   [0m | [95m 1.185   [0m | [95m 0.9791  [0m |
| [0m 4       [0m | [0m 0.6057  [0m | [0m 84.92   [0m | [0m 2.911   [0m | [0m 2.636   [0m | [0m 0.4321  [0m |
| [0m 5       [0m | [0m 0.7058  [0m | [0m 37.38   [0m | [0m 5.723   [0m | [0m 4.888   [0m | [0m 0.5071  [0m |
| [0m 6       [0m | [0m 0.622   [0m | [0m 65.07   [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5594  [0m |
| [0m 7       [0m | [0m 0.7088  [0m | [0m 51.05   [0m | [0m 8.067   [0m | [0m 2.797   [0m | [0m 0.6622 

In [81]:
best_hyperparam_knn_KC1 = hyperparam_knn_KC1.max['params']
result_knn_KC1 = run_ml(data_load['KC1'], 10, best_hyperparam_knn_KC1, model.knn,do_optimize = False)
result_knn_KC1

{'Train': {'Gmeans': 0.8825586394305268,
  'Recall': 0.9287223552613044,
  'pf': 0.16131043080759108,
  'cf_matrix': {'tn': 12109, 'fp': 2329, 'fn': 1012, 'tp': 13186}},
 'Dev': {'Gmeans': 0.7222084081510818,
  'Recall': 0.6735395189003437,
  'pf': 0.22560596643878186,
  'cf_matrix': {'tn': 1246, 'fp': 363, 'fn': 95, 'tp': 196}},
 'Test': {'Gmeans': 0.6820995770288557,
  'Recall': 0.5950920245398773,
  'pf': 0.21817162086371283,
  'cf_matrix': {'tn': 1394, 'fp': 389, 'fn': 132, 'tp': 194}}}

### KC2

In [82]:
hyperparam_knn_KC2 = run_ml(data_load['KC2'], 10, bound_knn['KC2'], model.knn)

|   iter    |  target   |   leaf    |     n     | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.7626  [0m | [0m 43.71   [0m | [0m 9.556   [0m | [0m 7.588   [0m | [0m 0.7411  [0m |
| [0m 2       [0m | [0m 0.6745  [0m | [0m 24.04   [0m | [0m 2.404   [0m | [0m 1.523   [0m | [0m 0.9137  [0m |
| [95m 3       [0m | [95m 0.7922  [0m | [95m 64.1    [0m | [95m 7.373   [0m | [95m 1.185   [0m | [95m 0.9806  [0m |
| [0m 4       [0m | [0m 0.6414  [0m | [0m 84.92   [0m | [0m 2.911   [0m | [0m 2.636   [0m | [0m 0.4733  [0m |
| [0m 5       [0m | [0m 0.769   [0m | [0m 37.38   [0m | [0m 5.723   [0m | [0m 4.888   [0m | [0m 0.5428  [0m |
| [0m 6       [0m | [0m 0.6278  [0m | [0m 65.07   [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5913  [0m |
| [0m 7       [0m | [0m 0.7678  [0m | [0m 51.05   [0m | [0m 8.067   [0m | [0m 2.797   [0m | [0m 0.6867 

In [83]:
best_hyperparam_knn_KC2 = hyperparam_knn_KC1.max['params']
result_knn_KC2 = run_ml(data_load['KC2'], 10, best_hyperparam_knn_KC2, model.knn,do_optimize = False)
result_knn_KC2

{'Train': {'Gmeans': 0.8536171146303146,
  'Recall': 0.8825493171471928,
  'pf': 0.17436661698956782,
  'cf_matrix': {'tn': 2770, 'fp': 585, 'fn': 387, 'tp': 2908}},
 'Dev': {'Gmeans': 0.7882915125586029,
  'Recall': 0.7666666666666667,
  'pf': 0.18947368421052632,
  'cf_matrix': {'tn': 308, 'fp': 72, 'fn': 21, 'tp': 69}},
 'Test': {'Gmeans': 0.7435190840238218,
  'Recall': 0.7102803738317757,
  'pf': 0.2216867469879518,
  'cf_matrix': {'tn': 323, 'fp': 92, 'fn': 31, 'tp': 76}}}

### PC1

In [84]:
hyperparam_knn_PC1 = run_ml(data_load['PC1'], 5, bound_knn['PC1'], model.knn)

|   iter    |  target   |   leaf    |     n     | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.7927  [0m | [0m 43.71   [0m | [0m 9.556   [0m | [0m 7.588   [0m | [0m 0.6867  [0m |
| [0m 2       [0m | [0m 0.6552  [0m | [0m 24.04   [0m | [0m 2.404   [0m | [0m 1.523   [0m | [0m 0.8955  [0m |
| [0m 3       [0m | [0m 0.7461  [0m | [0m 64.1    [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9765  [0m |
| [0m 4       [0m | [0m 0.5964  [0m | [0m 84.92   [0m | [0m 2.911   [0m | [0m 2.636   [0m | [0m 0.3626  [0m |
| [0m 5       [0m | [0m 0.7728  [0m | [0m 37.38   [0m | [0m 5.723   [0m | [0m 4.888   [0m | [0m 0.4467  [0m |
| [0m 6       [0m | [0m 0.6175  [0m | [0m 65.07   [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5054  [0m |
| [0m 7       [0m | [0m 0.7712  [0m | [0m 51.05   [0m | [0m 8.067   [0m | [0m 2.797   [0m | [0m 0.6208  [0m 

In [85]:
best_hyperparam_knn_PC1 = hyperparam_knn_PC1.max['params']
result_knn_PC1 = run_ml(data_load['PC1'], 5, best_hyperparam_knn_PC1, model.knn,do_optimize = False)
result_knn_PC1

{'Train': {'Gmeans': 0.8985768259937709,
  'Recall': 0.9742424242424242,
  'pf': 0.1712121212121212,
  'cf_matrix': {'tn': 2735, 'fp': 565, 'fn': 85, 'tp': 3215}},
 'Dev': {'Gmeans': 0.8279630607598736,
  'Recall': 0.8548387096774194,
  'pf': 0.19806763285024154,
  'cf_matrix': {'tn': 664, 'fp': 164, 'fn': 9, 'tp': 53}},
 'Test': {'Gmeans': 0.7678707513561651,
  'Recall': 0.7402597402597403,
  'pf': 0.20348837209302326,
  'cf_matrix': {'tn': 822, 'fp': 210, 'fn': 20, 'tp': 57}}}

### PC3

In [86]:
hyperparam_knn_PC3 = run_ml(data_load['PC3'], 7, bound_knn['PC3'], model.knn)

|   iter    |  target   |   leaf    |     n     | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.6981  [0m | [0m 43.71   [0m | [0m 9.556   [0m | [0m 7.588   [0m | [0m 0.7088  [0m |
| [0m 2       [0m | [0m 0.5814  [0m | [0m 24.04   [0m | [0m 2.404   [0m | [0m 1.523   [0m | [0m 0.9029  [0m |
| [0m 3       [0m | [0m 0.6626  [0m | [0m 64.1    [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9782  [0m |
| [0m 4       [0m | [0m 0.5517  [0m | [0m 84.92   [0m | [0m 2.911   [0m | [0m 2.636   [0m | [0m 0.4075  [0m |
| [0m 5       [0m | [0m 0.6623  [0m | [0m 37.38   [0m | [0m 5.723   [0m | [0m 4.888   [0m | [0m 0.4857  [0m |
| [0m 6       [0m | [0m 0.5887  [0m | [0m 65.07   [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5402  [0m |
| [0m 7       [0m | [0m 0.6701  [0m | [0m 51.05   [0m | [0m 8.067   [0m | [0m 2.797   [0m | [0m 0.6475  [0m 

In [87]:
best_hyperparam_knn_PC3 = hyperparam_knn_PC3.max['params']
result_knn_PC3 = run_ml(data_load['PC3'], 7, best_hyperparam_knn_PC3, model.knn,do_optimize = False)
result_knn_PC3

{'Train': {'Gmeans': 0.8837999732657926,
  'Recall': 0.9793589120932491,
  'pf': 0.20243499793644243,
  'cf_matrix': {'tn': 3865, 'fp': 981, 'fn': 85, 'tp': 4033}},
 'Dev': {'Gmeans': 0.7161306223460825,
  'Recall': 0.7142857142857143,
  'pf': 0.28201970443349755,
  'cf_matrix': {'tn': 583, 'fp': 229, 'fn': 32, 'tp': 80}},
 'Test': {'Gmeans': 0.7267496906979183,
  'Recall': 0.7014925373134329,
  'pf': 0.24708377518557795,
  'cf_matrix': {'tn': 710, 'fp': 233, 'fn': 40, 'tp': 94}}}

### PC4

In [88]:
hyperparam_knn_PC4 = run_ml(data_load['PC4'], 7, bound_knn['PC4'], model.knn)

|   iter    |  target   |   leaf    |     n     | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.783   [0m | [0m 43.71   [0m | [0m 9.556   [0m | [0m 7.588   [0m | [0m 0.7079  [0m |
| [0m 2       [0m | [0m 0.6935  [0m | [0m 24.04   [0m | [0m 2.404   [0m | [0m 1.523   [0m | [0m 0.9026  [0m |
| [95m 3       [0m | [95m 0.7872  [0m | [95m 64.1    [0m | [95m 7.373   [0m | [95m 1.185   [0m | [95m 0.9781  [0m |
| [0m 4       [0m | [0m 0.6477  [0m | [0m 84.92   [0m | [0m 2.911   [0m | [0m 2.636   [0m | [0m 0.4056  [0m |
| [0m 5       [0m | [0m 0.7812  [0m | [0m 37.38   [0m | [0m 5.723   [0m | [0m 4.888   [0m | [0m 0.4841  [0m |
| [0m 6       [0m | [0m 0.6954  [0m | [0m 65.07   [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5388  [0m |
| [0m 7       [0m | [0m 0.7756  [0m | [0m 51.05   [0m | [0m 8.067   [0m | [0m 2.797   [0m | [0m 0.6464 

In [89]:
best_hyperparam_knn_PC4 = hyperparam_knn_PC4.max['params']
result_knn_PC4 = run_ml(data_load['PC4'], 7, best_hyperparam_knn_PC4, model.knn,do_optimize = False)
result_knn_PC4

{'Train': {'Gmeans': 0.906258105193544,
  'Recall': 0.9848760932944607,
  'pf': 0.1660841817352986,
  'cf_matrix': {'tn': 5488, 'fp': 1093, 'fn': 83, 'tp': 5405}},
 'Dev': {'Gmeans': 0.8086848227815125,
  'Recall': 0.8376623376623377,
  'pf': 0.21929026387625114,
  'cf_matrix': {'tn': 858, 'fp': 241, 'fn': 25, 'tp': 129}},
 'Test': {'Gmeans': 0.7977760520431051,
  'Recall': 0.8089887640449438,
  'pf': 0.21328125,
  'cf_matrix': {'tn': 1007, 'fp': 273, 'fn': 34, 'tp': 144}}}

# CM1

In [90]:
hyperparam_knn_CM1 = run_ml(data_load['CM1'], 5, bound_knn['CM1'], model.knn)

|   iter    |  target   |   leaf    |     n     | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.6562  [0m | [0m 43.71   [0m | [0m 9.556   [0m | [0m 7.588   [0m | [0m 0.6984  [0m |
| [0m 2       [0m | [0m 0.3975  [0m | [0m 24.04   [0m | [0m 2.404   [0m | [0m 1.523   [0m | [0m 0.8994  [0m |
| [0m 3       [0m | [0m 0.6134  [0m | [0m 64.1    [0m | [0m 7.373   [0m | [0m 1.185   [0m | [0m 0.9774  [0m |
| [0m 4       [0m | [0m 0.4275  [0m | [0m 84.92   [0m | [0m 2.911   [0m | [0m 2.636   [0m | [0m 0.3862  [0m |
| [0m 5       [0m | [0m 0.6196  [0m | [0m 37.38   [0m | [0m 5.723   [0m | [0m 4.888   [0m | [0m 0.4673  [0m |
| [0m 6       [0m | [0m 0.4493  [0m | [0m 65.07   [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5238  [0m |
| [0m 7       [0m | [0m 0.5948  [0m | [0m 51.05   [0m | [0m 8.067   [0m | [0m 2.797   [0m | [0m 0.6349  [0m 

In [91]:
best_hyperparam_knn_CM1 = hyperparam_knn_CM1.max['params']
result_knn_CM1 = run_ml(data_load['CM1'], 5, best_hyperparam_knn_CM1, model.knn,do_optimize = False)
result_knn_CM1

{'Train': {'Gmeans': 0.8396792590386882,
  'Recall': 0.9916434540389972,
  'pf': 0.2889972144846797,
  'cf_matrix': {'tn': 1021, 'fp': 415, 'fn': 12, 'tp': 1424}},
 'Dev': {'Gmeans': 0.6894039293315478,
  'Recall': 0.725,
  'pf': 0.34444444444444444,
  'cf_matrix': {'tn': 236, 'fp': 124, 'fn': 11, 'tp': 29}},
 'Test': {'Gmeans': 0.6616632692663825,
  'Recall': 0.6530612244897959,
  'pf': 0.32962138084632514,
  'cf_matrix': {'tn': 301, 'fp': 148, 'fn': 17, 'tp': 32}}}

## JM1

In [92]:
hyperparam_knn_JM1 = run_ml(data_load['JM1'], 10, bound_knn['JM1'], model.knn)

|   iter    |  target   |   leaf    |     n     | neighbors | sampling  |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.6342  [0m | [0m 43.71   [0m | [0m 9.556   [0m | [0m 7.588   [0m | [0m 0.7364  [0m |
| [0m 2       [0m | [0m 0.5474  [0m | [0m 24.04   [0m | [0m 2.404   [0m | [0m 1.523   [0m | [0m 0.9121  [0m |
| [95m 3       [0m | [95m 0.6351  [0m | [95m 64.1    [0m | [95m 7.373   [0m | [95m 1.185   [0m | [95m 0.9802  [0m |
| [0m 4       [0m | [0m 0.5083  [0m | [0m 84.92   [0m | [0m 2.911   [0m | [0m 2.636   [0m | [0m 0.4637  [0m |
| [0m 5       [0m | [0m 0.597   [0m | [0m 37.38   [0m | [0m 5.723   [0m | [0m 4.888   [0m | [0m 0.5346  [0m |
| [0m 6       [0m | [0m 0.5334  [0m | [0m 65.07   [0m | [0m 2.255   [0m | [0m 3.629   [0m | [0m 0.5839  [0m |
| [0m 7       [0m | [0m 0.6093  [0m | [0m 51.05   [0m | [0m 8.067   [0m | [0m 2.797   [0m | [0m 0.681  

In [93]:
best_hyperparam_knn_JM1 = hyperparam_knn_JM1.max['params']
result_knn_JM1 = run_ml(data_load['JM1'], 10, best_hyperparam_knn_JM1, model.knn,do_optimize = False)
result_knn_JM1

{'Train': {'Gmeans': 0.8157737223717225,
  'Recall': 0.8728566806858622,
  'pf': 0.2375761326712897,
  'cf_matrix': {'tn': 54203, 'fp': 16890, 'fn': 9039, 'tp': 62054}},
 'Dev': {'Gmeans': 0.6435844410110655,
  'Recall': 0.5847368421052631,
  'pf': 0.2916455696202532,
  'cf_matrix': {'tn': 5596, 'fp': 2304, 'fn': 789, 'tp': 1111}},
 'Test': {'Gmeans': 0.6429562500303476,
  'Recall': 0.5820256776034237,
  'pf': 0.28973453343967187,
  'cf_matrix': {'tn': 6234, 'fp': 2543, 'fn': 879, 'tp': 1224}}}

# STORE VARIABLE

In [94]:
from datetime import datetime
import dill

dill.dump_session('Save/Compare-EI-Matrix-Final2.db')

In [1]:
import dill
dill.load_session('Save/Compare-EI-Matrix-Final2.db')

In [9]:
print(result_lr_KC1['Test']['Gmeans'])
print(result_lr_KC2['Test']['Gmeans'])
print(result_lr_PC1['Test']['Gmeans'])
print(result_lr_PC3['Test']['Gmeans'])
print(result_lr_PC4['Test']['Gmeans'])
print(result_lr_CM1['Test']['Gmeans'])
print(result_lr_JM1['Test']['Gmeans'])

0.7102772005295286
0.764809864632722
0.6889705061911494
0.7284518676090967
0.817529807380169
0.7200846376425808
0.6553199898117448
