In [1]:
import time
import pprint

import torch
import pandas as pd
from sklearn.metrics import f1_score, balanced_accuracy_score, precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from tqdm.notebook import tqdm

from utils import *
from models.CreditcardDataset import CreditcardDataset
from models.OversamplingDataset import OversamplingDataset
from models.LorasDataset import LorasDataset
from models.Autoencoder import Autoencoder
from models.Classifier import Classifier
from models.HiddenReprClassifier import HiddenReprClassifier

from imblearn.over_sampling import SMOTE, ADASYN, SVMSMOTE


DATA_PATH = "./data/"
pp = pprint.PrettyPrinter(indent=4)

In [2]:
BATCH_SIZE = 64

LEARNING_RATE_AUTOENC = 2e-3
LEARNING_RATE_CLASSIFIER = 3e-4
LEARNING_RATE_SMALL_CLASS = 1e-4
NOISE_STR = 0.2

AUTOENC_EPOCHS = 25
CLASSIFIER_EPOCHS = 25

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

cpu


In [3]:
train_df = pd.read_parquet(DATA_PATH+"train.parquet")
valid_df = pd.read_parquet(DATA_PATH+"valid.parquet")
test_df = pd.read_parquet(DATA_PATH+"test.parquet")

In [4]:
oversamplers = {
    "SMOTE":SMOTE(random_state=42, k_neighbors=30, n_jobs=-1),
    "ADASYN": ADASYN(random_state=42, n_neighbors=30, n_jobs=-1),
    "SVMSMOTE": SVMSMOTE(random_state=42, k_neighbors=30, n_jobs=-1)
}

In [5]:
train_datasets = {key: OversamplingDataset(train_df, oversamplers[key])for key in oversamplers.keys()}
valid_dataset = CreditcardDataset(valid_df)
test_dataset = CreditcardDataset(test_df)

train_datasets['None'] = CreditcardDataset(train_df)#baseline(no oversampling)
train_datasets['LORAS'] = LorasDataset(train_df)

In [6]:
autoenc_models = {key:Autoencoder().to(DEVICE) for key in train_datasets.keys()}
autoenc_optims = {key:torch.optim.Adam(autoenc_models[key].parameters(),
                                       lr=LEARNING_RATE_AUTOENC) for key in autoenc_models.keys()}

In [7]:
def calculate_valid_loss_autoenc(model, valid_dataset, loss_fn):
    dataloader = torch.utils.data.DataLoader(valid_dataset, 1, shuffle=False)
    loss_list = []

    for x, _ in dataloader:
        x = x.to(DEVICE)
        logits = model(x)
        loss = loss_fn(logits, x)
        loss_val = loss.item()
        loss_list.append(loss_val)

    return np.mean(loss_list)


def train_autoenc_model(model, optimizer, train_dataset, valid_dataset):
    model.train()
    loss_fn = torch.nn.MSELoss()

    for epoch in range(AUTOENC_EPOCHS):
        print("\n Start of epoch {}/{}...".format(epoch + 1, AUTOENC_EPOCHS))
        epoch_start_time = time.time()

        loss_list = []
        dataloader = torch.utils.data.DataLoader(train_dataset, BATCH_SIZE, shuffle=True)

        for x_orig, _ in dataloader:
            x_orig_gpu = x_orig.cpu().to(DEVICE)
            x_noisy = get_noised_data(x_orig, NOISE_STR)
            x_noisy = x_noisy.to(DEVICE)

            logits = model(x_noisy)
            loss = loss_fn(logits, x_orig_gpu)
            loss_val = loss.item()
            loss_list.append(loss_val)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss = np.mean(loss_list)
        valid_loss = calculate_valid_loss_autoenc(model, valid_dataset, loss_fn)

        epoch_time = time.time() - epoch_start_time
        print(
            "\n Epoch {} complete in {:.2f},train loss = {}, valid_loss = {}".format(epoch + 1, epoch_time, train_loss, valid_loss))

In [8]:
for key in autoenc_models.keys():
    print("Training DAE with oversamlping method:", key)
    train_autoenc_model(autoenc_models[key], autoenc_optims[key], train_datasets[key], valid_dataset)
    print("==========================")

Training DAE with oversamlping method: SMOTE

 Start of epoch 1/25...

 Epoch 1 complete in 13.60,train loss = 1.9244747089419718, valid_loss = 0.9762485881264192

 Start of epoch 2/25...

 Epoch 2 complete in 13.15,train loss = 1.2021769919100298, valid_loss = 0.8929161297243146

 Start of epoch 3/25...

 Epoch 3 complete in 14.00,train loss = 0.7914974241825928, valid_loss = 0.7499868774057452

 Start of epoch 4/25...

 Epoch 4 complete in 13.89,train loss = 0.5889178604797918, valid_loss = 0.6135659151393742

 Start of epoch 5/25...

 Epoch 5 complete in 13.27,train loss = 0.49159085772620137, valid_loss = 0.5237622026049705

 Start of epoch 6/25...

 Epoch 6 complete in 14.37,train loss = 0.4317564800573315, valid_loss = 0.45413159658590657

 Start of epoch 7/25...

 Epoch 7 complete in 13.70,train loss = 0.37382192168253026, valid_loss = 0.3906586042777243

 Start of epoch 8/25...

 Epoch 8 complete in 13.95,train loss = 0.3320331587956266, valid_loss = 0.3549492147573495

 Start 


 Epoch 18 complete in 13.41,train loss = 0.3483172924955558, valid_loss = 0.36661811622061036

 Start of epoch 19/25...

 Epoch 19 complete in 13.85,train loss = 0.3347305433495252, valid_loss = 0.35042711346072786

 Start of epoch 20/25...

 Epoch 20 complete in 13.32,train loss = 0.3202953107439437, valid_loss = 0.34159653709199395

 Start of epoch 21/25...

 Epoch 21 complete in 14.06,train loss = 0.30835437424169476, valid_loss = 0.32515947161427644

 Start of epoch 22/25...

 Epoch 22 complete in 13.84,train loss = 0.2957513698164098, valid_loss = 0.3129692730808383

 Start of epoch 23/25...

 Epoch 23 complete in 12.96,train loss = 0.28554409055752, valid_loss = 0.3063822615908568

 Start of epoch 24/25...

 Epoch 24 complete in 13.85,train loss = 0.2780003334284787, valid_loss = 0.30298210431011574

 Start of epoch 25/25...

 Epoch 25 complete in 12.97,train loss = 0.27220234981591296, valid_loss = 0.2943604440841609
Training DAE with oversamlping method: None

 Start of epoch 

In [9]:
classifier_models = {key:Classifier().to(DEVICE) for key in autoenc_models.keys()}
classifier_optims = {key:torch.optim.Adam(classifier_models[key].parameters(),
                                       lr=LEARNING_RATE_AUTOENC) for key in classifier_models.keys()}

In [10]:
def pass_through_autoenc_classifier(x, y, autoenc, classifier, loss_fn, passthrough_fnc):
    autoenc_out = passthrough_fnc(x)
    logits = classifier(autoenc_out)
    loss = loss_fn(logits, y)
    loss_val = loss.item()
    
    return loss, loss_val


def calculate_valid_loss_classifier(autoenc, model, valid_dataset, loss_fn, passthrough_fnc):

    dataloader = torch.utils.data.DataLoader(valid_dataset, 1, shuffle=False)
    loss_list = []

    for x, y in dataloader:
        x = x.to(DEVICE)
        y = y.type(torch.LongTensor).to(DEVICE)

        _, loss_val = pass_through_autoenc_classifier(x ,y, autoenc, model, loss_fn, passthrough_fnc)
        loss_list.append(loss_val)

    return np.mean(loss_list)


def train_classifier_model(autoenc_model, model, optimizer, train_dataset, valid_dataset, passthrough_fnc):
    autoenc_model.eval()
    model.train()

    loss_fn = torch.nn.CrossEntropyLoss()

    for epoch in range(CLASSIFIER_EPOCHS):
        print("\n Start of epoch {}/{}".format(epoch + 1, CLASSIFIER_EPOCHS))
        epoch_start_time = time.time()

        loss_list = []
        dataloader = torch.utils.data.DataLoader(train_dataset, BATCH_SIZE, shuffle=True)

        for x, y in dataloader:
            x = x.to(DEVICE)
            y = y.type(torch.LongTensor).to(DEVICE)

            loss, loss_val = pass_through_autoenc_classifier(x, y, autoenc_model, model, loss_fn, passthrough_fnc)
            loss_list.append(loss_val)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss = np.mean(loss_list)
        valid_loss = calculate_valid_loss_classifier(autoenc_model, model, valid_dataset, loss_fn, passthrough_fnc)

        epoch_time = time.time() - epoch_start_time
        print(
            "\n Epoch {} complete in {:.2f}, train loss = {}, valid_loss = {}".format(epoch + 1, epoch_time, train_loss,
                                                                                      valid_loss))

In [11]:
for key in classifier_models.keys():
    print("Training classifier with oversamlping method:", key)
    train_classifier_model(autoenc_models[key], classifier_models[key], classifier_optims[key],
                          train_datasets[key], valid_dataset, autoenc_models[key].forward)

Training classifier with oversamlping method: SMOTE

 Start of epoch 1/25

 Epoch 1 complete in 14.56, train loss = 0.05500454941762228, valid_loss = 0.04433457963454021

 Start of epoch 2/25

 Epoch 2 complete in 14.27, train loss = 0.0282990623662103, valid_loss = 0.03727009489776419

 Start of epoch 3/25

 Epoch 3 complete in 14.91, train loss = 0.024101852252216126, valid_loss = 0.03207793317864537

 Start of epoch 4/25

 Epoch 4 complete in 14.39, train loss = 0.021837329883508345, valid_loss = 0.03743876347437629

 Start of epoch 5/25

 Epoch 5 complete in 13.22, train loss = 0.01987532762215714, valid_loss = 0.02740446938554779

 Start of epoch 6/25

 Epoch 6 complete in 14.12, train loss = 0.01823370993043591, valid_loss = 0.025733321148889356

 Start of epoch 7/25

 Epoch 7 complete in 14.35, train loss = 0.017378028468173195, valid_loss = 0.026211655710496995

 Start of epoch 8/25

 Epoch 8 complete in 14.89, train loss = 0.016739589429326036, valid_loss = 0.025050752157037




 Epoch 18 complete in 14.69, train loss = 0.008590314017067486, valid_loss = 0.012326024262395947

 Start of epoch 19/25

 Epoch 19 complete in 14.61, train loss = 0.008275521302843631, valid_loss = 0.013531317070821213

 Start of epoch 20/25

 Epoch 20 complete in 13.89, train loss = 0.008135079633128664, valid_loss = 0.014734266045258848

 Start of epoch 21/25

 Epoch 21 complete in 14.75, train loss = 0.008150603048915812, valid_loss = 0.014460120971641779

 Start of epoch 22/25

 Epoch 22 complete in 14.23, train loss = 0.007842023952998073, valid_loss = 0.018411421177801174

 Start of epoch 23/25

 Epoch 23 complete in 14.93, train loss = 0.007587305355375234, valid_loss = 0.012940325231308466

 Start of epoch 24/25

 Epoch 24 complete in 14.69, train loss = 0.0077345821510757225, valid_loss = 0.014149965849435977

 Start of epoch 25/25

 Epoch 25 complete in 14.85, train loss = 0.007429295881961611, valid_loss = 0.014478103468191762
Training classifier with oversamlping method: 

In [12]:
def evaluate_predictions(auto_enc, model, dataset, passthrough_fnc):
    dataloader = torch.utils.data.DataLoader(dataset, 1, shuffle=False)

    predictions = []
    ground_truth = []
    for x, y in tqdm(dataloader):
        x = x.to(DEVICE)
        ground_truth.append(y.item())

        autoenc_out = passthrough_fnc(x)
        logits = model(autoenc_out)
        predictions.append(np.argmax(logits.detach().cpu().numpy()))
        
    return {
        'precision': precision_score(ground_truth, predictions),
        'recall': recall_score(ground_truth, predictions),
        'F1': f1_score(ground_truth, predictions),
        'balanced accuracy': balanced_accuracy_score(ground_truth, predictions)
    }

In [13]:
for key in classifier_models.keys():
    print("Results for model with oversampling algorthm:", key)
    pp.pprint(evaluate_predictions(autoenc_models[key], classifier_models[key],
                               test_dataset, autoenc_models[key].forward))

Results for model with oversampling algorthm: SMOTE


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.371559633027523,
    'balanced accuracy': 0.9110055284071985,
    'precision': 0.23964497041420119,
    'recall': 0.826530612244898}
Results for model with oversampling algorthm: ADASYN


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.3048327137546469,
    'balanced accuracy': 0.9152194853743412,
    'precision': 0.18636363636363637,
    'recall': 0.8367346938775511}
Results for model with oversampling algorthm: SVMSMOTE


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.4565217391304348,
    'balanced accuracy': 0.9269359474234262,
    'precision': 0.3111111111111111,
    'recall': 0.8571428571428571}
Results for model with oversampling algorthm: None


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.8,
    'balanced accuracy': 0.89779211839491,
    'precision': 0.8041237113402062,
    'recall': 0.7959183673469388}
Results for model with oversampling algorthm: LORAS


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.7772020725388602,
    'balanced accuracy': 0.8824772030365325,
    'precision': 0.7894736842105263,
    'recall': 0.7653061224489796}


In [14]:
hidden_classifier_models = {key:HiddenReprClassifier().to(DEVICE) for key in autoenc_models.keys()}
hidden_classifier_optims = {key:torch.optim.Adam(hidden_classifier_models[key].parameters(),
                                       lr=LEARNING_RATE_AUTOENC) for key in hidden_classifier_models.keys()}

In [15]:
for key in hidden_classifier_models.keys():
    print("Training hidden classifiers with oversamlping method:", key)
    train_classifier_model(autoenc_models[key], hidden_classifier_models[key], hidden_classifier_optims[key],
                          train_datasets[key], valid_dataset, autoenc_models[key].get_enc)

Training hidden classifiers with oversamlping method: SMOTE

 Start of epoch 1/25

 Epoch 1 complete in 11.05, train loss = 0.14679390678325985, valid_loss = 0.09318339910891095

 Start of epoch 2/25

 Epoch 2 complete in 10.54, train loss = 0.1187170810097607, valid_loss = 0.09763453537820505

 Start of epoch 3/25

 Epoch 3 complete in 10.65, train loss = 0.11350252241318465, valid_loss = 0.11183849896448393

 Start of epoch 4/25

 Epoch 4 complete in 10.70, train loss = 0.11070330346323598, valid_loss = 0.0918422930405236

 Start of epoch 5/25

 Epoch 5 complete in 10.63, train loss = 0.10911304718182928, valid_loss = 0.08698966074270038

 Start of epoch 6/25

 Epoch 6 complete in 10.76, train loss = 0.10812105876977564, valid_loss = 0.08104388714708234

 Start of epoch 7/25

 Epoch 7 complete in 11.24, train loss = 0.10724242254679908, valid_loss = 0.08068744862718331

 Start of epoch 8/25

 Epoch 8 complete in 11.32, train loss = 0.10650157722857125, valid_loss = 0.1035198371425325


 Epoch 19 complete in 11.17, train loss = 0.04167322698355259, valid_loss = 0.034683967904450455

 Start of epoch 20/25

 Epoch 20 complete in 10.47, train loss = 0.04149504603929982, valid_loss = 0.04756348493830615

 Start of epoch 21/25

 Epoch 21 complete in 10.88, train loss = 0.04138873088418003, valid_loss = 0.036720051579827134

 Start of epoch 22/25

 Epoch 22 complete in 10.80, train loss = 0.0411410720154405, valid_loss = 0.04167162281195219

 Start of epoch 23/25

 Epoch 23 complete in 10.96, train loss = 0.04094531385574664, valid_loss = 0.024464824742783924

 Start of epoch 24/25

 Epoch 24 complete in 10.01, train loss = 0.04092063851238279, valid_loss = 0.029294292093107733

 Start of epoch 25/25

 Epoch 25 complete in 10.54, train loss = 0.04077866153244657, valid_loss = 0.04751491293002497
Training hidden classifiers with oversamlping method: None

 Start of epoch 1/25

 Epoch 1 complete in 19.36, train loss = 0.021970186651158793, valid_loss = 0.0034770076373488172


In [16]:
for key in hidden_classifier_models.keys():
    print("Results for hidden_repr model with oversampling algorthm:", key)
    pp.pprint(evaluate_predictions(autoenc_models[key], hidden_classifier_models[key],
                               test_dataset, autoenc_models[key].get_enc))

Results for hidden_repr model with oversampling algorthm: SMOTE


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.0841813135985199,
    'balanced accuracy': 0.9469373040437334,
    'precision': 0.04408914728682171,
    'recall': 0.9285714285714286}
Results for hidden_repr model with oversampling algorthm: ADASYN


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.049141347424042275,
    'balanced accuracy': 0.9428880795424529,
    'precision': 0.025223759153783564,
    'recall': 0.9489795918367347}
Results for hidden_repr model with oversampling algorthm: SVMSMOTE


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.14617368873602749,
    'balanced accuracy': 0.9250564181778507,
    'precision': 0.07981220657276995,
    'recall': 0.8673469387755102}
Results for hidden_repr model with oversampling algorthm: None


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.7777777777777778,
    'balanced accuracy': 0.8570373422300828,
    'precision': 0.8536585365853658,
    'recall': 0.7142857142857143}
Results for hidden_repr model with oversampling algorthm: LORAS


  0%|          | 0/56962 [00:00<?, ?it/s]

{   'F1': 0.6367346938775511,
    'balanced accuracy': 0.897352472925017,
    'precision': 0.5306122448979592,
    'recall': 0.7959183673469388}


In [17]:
train_X, train_y = get_x_y(train_df)

valid_X, valid_y = get_x_y(valid_df)
test_X, test_y = get_x_y(test_df)

X = pd.concat([train_X, valid_X])
y = pd.concat([train_y, valid_y])

train_datasets = {key: oversamplers[key].fit_resample(X,y) for key in oversamplers.keys()}
train_datasets['LORAS'] = loras_oversample_dataframe(pd.concat([X, y] ,axis=1))

In [18]:
def eval_for_thresholds(y, predictions):
    return {thr: eval_for_threshhold(y, predictions, thr) for thr in np.arange(0.2, 0.9, 0.1)}


def eval_for_threshhold(ground_truth, predictions, thr):
    class_predicitons = (predictions[:,1] >= thr).astype(bool)
    return  {
        'precision': precision_score(ground_truth, class_predicitons),
        'recall': recall_score(ground_truth, class_predicitons),
        'F1': f1_score(ground_truth, class_predicitons),
        'balanced accuracy': balanced_accuracy_score(ground_truth, class_predicitons)
    } 


def random_forest_train_eval(train_X, train_y, testX, test_y, autoencoder = None):
    rf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=42, n_jobs=-1)
    if autoencoder is not None:
        if type(train_X) is not np.ndarray:
            train_X = autoencoder(torch.tensor(train_X.to_numpy()).to(DEVICE)).cpu().detach().numpy()
        else:
            train_X = autoencoder(torch.tensor(train_X).to(DEVICE)).cpu().detach().numpy()
    rf.fit(train_X, train_y)
    predictions = rf.predict_proba(test_X)
    return eval_for_thresholds(test_y.values, predictions)


def linear_regression_train_eval(train_X, train_y, testX, test_y, autoencoder = None):
    lr = LogisticRegression(random_state=42, C=.005, solver='lbfgs', multi_class='multinomial',
                            max_iter=685, n_jobs=-1)
    if autoencoder is not None:
        if type(train_X) is not np.ndarray:
            train_X = autoencoder(torch.tensor(train_X.to_numpy()).to(DEVICE)).cpu().detach().numpy()
        else:
            train_X = autoencoder(torch.tensor(train_X).to(DEVICE)).cpu().detach().numpy()
    lr.fit(train_X, train_y)
    predictions = lr.predict_proba(test_X)
    return eval_for_thresholds(test_y.values, predictions)

In [19]:
for key in train_datasets.keys():
    print("Using oversampling algorithm:", key)
    curr_X, curr_y = train_datasets[key]
    
    lr_results = linear_regression_train_eval(curr_X, curr_y, test_X, test_y)
    print("Logistic regression(No DAE):")
    pp.pprint(lr_results)
    
    lr_dae_results = linear_regression_train_eval(curr_X, curr_y, test_X, test_y, autoenc_models[key])
    print("Logistic regression (With DAE):")
    pp.pprint(lr_dae_results)
    
    rf_results = random_forest_train_eval(curr_X, curr_y, test_X, test_y)
    print("Random forest (No DAE):")
    pp.pprint(rf_results)
    
    rf_dae_results = random_forest_train_eval(curr_X, curr_y, test_X, test_y, autoenc_models[key])
    print("Random forest(DAE):")
    pp.pprint(rf_dae_results)
    
    print("==========================")

Using oversampling algorithm: SMOTE
Logistic regression(No DAE):
{   0.2: {   'F1': 0.03224711473183978,
             'balanced accuracy': 0.9345830868926073,
             'precision': 0.016396272005522954,
             'recall': 0.9693877551020408},
    0.30000000000000004: {   'F1': 0.05664356733956011,
                             'balanced accuracy': 0.9520964090475807,
                             'precision': 0.029183483390251473,
                             'recall': 0.9591836734693877},
    0.4000000000000001: {   'F1': 0.08736496007515265,
                            'balanced accuracy': 0.9574491375053117,
                            'precision': 0.04579025110782865,
                            'recall': 0.9489795918367347},
    0.5000000000000001: {   'F1': 0.12068965517241377,
                            'balanced accuracy': 0.9526878667899349,
                            'precision': 0.06453900709219858,
                            'recall': 0.9285714285714286},
    0.600

Logistic regression (With DAE):
{   0.2: {   'F1': 0.007972270363951473,
             'balanced accuracy': 0.7681180589849896,
             'precision': 0.00400313288660691,
             'recall': 0.9387755102040817},
    0.30000000000000004: {   'F1': 0.0089220772923435,
                             'balanced accuracy': 0.7897222373755355,
                             'precision': 0.004482338611449452,
                             'recall': 0.9387755102040817},
    0.4000000000000001: {   'F1': 0.009928236119354665,
                            'balanced accuracy': 0.8080994180170662,
                            'precision': 0.004990507187415243,
                            'recall': 0.9387755102040817},
    0.5000000000000001: {   'F1': 0.010993606978550516,
                            'balanced accuracy': 0.8238914832956254,
                            'precision': 0.005529178436204099,
                            'recall': 0.9387755102040817},
    0.6000000000000001: {   'F1': 0.012

Random forest (No DAE):
{   0.2: {   'F1': 0.02433972035214915,
             'balanced accuracy': 0.9133636431500005,
             'precision': 0.012326252294781013,
             'recall': 0.9591836734693877},
    0.30000000000000004: {   'F1': 0.07137129109863671,
                             'balanced accuracy': 0.9337963906721947,
                             'precision': 0.03714524207011686,
                             'recall': 0.9081632653061225},
    0.4000000000000001: {   'F1': 0.12036389083275018,
                            'balanced accuracy': 0.927828338003744,
                            'precision': 0.06461307287753569,
                            'recall': 0.8775510204081632},
    0.5000000000000001: {   'F1': 0.17628541448058763,
                            'balanced accuracy': 0.9217920954256773,
                            'precision': 0.09824561403508772,
                            'recall': 0.8571428571428571},
    0.6000000000000001: {   'F1': 0.3241106719367589

Random forest(DAE):
{   0.2: {   'F1': 0.014218009478672985,
             'balanced accuracy': 0.8611404008705339,
             'precision': 0.007162661737523105,
             'recall': 0.9489795918367347},
    0.30000000000000004: {   'F1': 0.05032513429459994,
                             'balanced accuracy': 0.9246253861702249,
                             'precision': 0.02587961616749055,
                             'recall': 0.9081632653061225},
    0.4000000000000001: {   'F1': 0.20935960591133002,
                            'balanced accuracy': 0.9281427293765001,
                            'precision': 0.11904761904761904,
                            'recall': 0.8673469387755102},
    0.5000000000000001: {   'F1': 0.40586797066014674,
                            'balanced accuracy': 0.9214646044123896,
                            'precision': 0.26688102893890675,
                            'recall': 0.8469387755102041},
    0.6000000000000001: {   'F1': 0.7333333333333334,
