In [1]:
import time
import pprint

import torch
import pandas as pd
from sklearn.metrics import f1_score, balanced_accuracy_score, precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from tqdm.notebook import tqdm

from utils import *
from models.CreditcardDataset import CreditcardDataset
from models.OversamplingDataset import OversamplingDataset
from models.LorasDataset import LorasDataset
from models.Autoencoder import Autoencoder
from models.Classifier import Classifier
from models.HiddenReprClassifier import HiddenReprClassifier

from imblearn.over_sampling import SMOTE, ADASYN, SVMSMOTE


DATA_PATH = "./data/"
pp = pprint.PrettyPrinter(indent=4)

In [10]:
BATCH_SIZE = 64

LEARNING_RATE_AUTOENC = 1e-3
LEARNING_RATE_CLASSIFIER = 3e-4
LEARNING_RATE_SMALL_CLASS = 1e-4
NOISE_STR = 0.2

AUTOENC_EPOCHS = 25
CLASSIFIER_EPOCHS = 25

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

cuda


In [3]:
train_df = pd.read_parquet(DATA_PATH+"train.parquet")
valid_df = pd.read_parquet(DATA_PATH+"valid.parquet")
test_df = pd.read_parquet(DATA_PATH+"test.parquet")

In [4]:
oversamplers = {
    "SMOTE":SMOTE(random_state=42, k_neighbors=30, n_jobs=-1),
    "ADASYN": ADASYN(random_state=42, n_neighbors=30, n_jobs=-1),
    "SVMSMOTE": SVMSMOTE(random_state=42, k_neighbors=30, n_jobs=-1)
}

In [5]:
train_datasets = {key: OversamplingDataset(train_df, oversamplers[key])for key in oversamplers.keys()}
valid_dataset = CreditcardDataset(valid_df)
test_dataset = CreditcardDataset(test_df)

train_datasets['None'] = CreditcardDataset(train_df)#baseline(no oversampling)
train_datasets['LORAS'] = LorasDataset(train_df)

In [11]:
autoenc_models = {key:Autoencoder().to(DEVICE) for key in train_datasets.keys()}
autoenc_optims = {key:torch.optim.Adam(autoenc_models[key].parameters(),
                                       lr=LEARNING_RATE_AUTOENC) for key in autoenc_models.keys()}

In [12]:
def calculate_valid_loss_autoenc(model, valid_dataset, loss_fn):
    dataloader = torch.utils.data.DataLoader(valid_dataset, 1, shuffle=False)
    loss_list = []

    for x, _ in dataloader:
        x = x.to(DEVICE)
        logits = model(x)
        loss = loss_fn(logits, x)
        loss_val = loss.item()
        loss_list.append(loss_val)

    return np.mean(loss_list)


def train_autoenc_model(model, optimizer, train_dataset, valid_dataset):
    model.train()
    loss_fn = torch.nn.MSELoss()

    for epoch in range(AUTOENC_EPOCHS):
        print("\n Start of epoch {}/{}...".format(epoch + 1, AUTOENC_EPOCHS))
        epoch_start_time = time.time()

        loss_list = []
        dataloader = torch.utils.data.DataLoader(train_dataset, BATCH_SIZE, shuffle=True)

        for x_orig, _ in dataloader:
            x_orig_gpu = x_orig.cpu().to(DEVICE)
            x_noisy = get_noised_data(x_orig, NOISE_STR)
            x_noisy = x_noisy.to(DEVICE)

            logits = model(x_noisy)
            loss = loss_fn(logits, x_orig_gpu)
            loss_val = loss.item()
            loss_list.append(loss_val)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss = np.mean(loss_list)
        valid_loss = calculate_valid_loss_autoenc(model, valid_dataset, loss_fn)

        epoch_time = time.time() - epoch_start_time
        print(
            "\n Epoch {} complete in {:.2f},train loss = {}, valid_loss = {}".format(epoch + 1, epoch_time, train_loss, valid_loss))

In [13]:
for key in autoenc_models.keys():
    print("Training DAE with oversamlping method:", key)
    train_autoenc_model(autoenc_models[key], autoenc_optims[key], train_datasets[key], valid_dataset)
    print("==========================")

Training DAE with oversamlping method: SMOTE

 Start of epoch 1/25...

 Epoch 1 complete in 26.78,train loss = 0.7046817020463445, valid_loss = 0.5276378144573118

 Start of epoch 2/25...

 Epoch 2 complete in 27.91,train loss = 0.42252989213251163, valid_loss = 0.47941868600193593

 Start of epoch 3/25...

 Epoch 3 complete in 26.62,train loss = 0.405656765775234, valid_loss = 0.4761381655231647

 Start of epoch 4/25...

 Epoch 4 complete in 26.71,train loss = 0.40466112380390856, valid_loss = 0.4783897305280724

 Start of epoch 5/25...

 Epoch 5 complete in 26.69,train loss = 0.404406802509318, valid_loss = 0.47933296873020925

 Start of epoch 6/25...

 Epoch 6 complete in 26.44,train loss = 0.40448079619679966, valid_loss = 0.4769146582626889

 Start of epoch 7/25...

 Epoch 7 complete in 26.75,train loss = 0.4040201358782082, valid_loss = 0.47882974493356684

 Start of epoch 8/25...

 Epoch 8 complete in 27.71,train loss = 0.40380863664674016, valid_loss = 0.47949298111533034

 Sta


 Epoch 18 complete in 26.92,train loss = 0.5135755402016365, valid_loss = 0.49816211370072433

 Start of epoch 19/25...

 Epoch 19 complete in 26.37,train loss = 0.5135825036668262, valid_loss = 0.4944579014480326

 Start of epoch 20/25...

 Epoch 20 complete in 26.63,train loss = 0.5134635130877847, valid_loss = 0.5003366655857794

 Start of epoch 21/25...

 Epoch 21 complete in 26.89,train loss = 0.5133997007036594, valid_loss = 0.4968972991765369

 Start of epoch 22/25...

 Epoch 22 complete in 27.18,train loss = 0.5136578494423844, valid_loss = 0.49519535132757864

 Start of epoch 23/25...

 Epoch 23 complete in 27.02,train loss = 0.5135201167036055, valid_loss = 0.4957132193365127

 Start of epoch 24/25...

 Epoch 24 complete in 27.15,train loss = 0.5130544659705416, valid_loss = 0.4980792287063395

 Start of epoch 25/25...

 Epoch 25 complete in 27.47,train loss = 0.5130967287916279, valid_loss = 0.4975654365885709
Training DAE with oversamlping method: None

 Start of epoch 1/2

In [14]:
classifier_models = {key:Classifier().to(DEVICE) for key in autoenc_models.keys()}
classifier_optims = {key:torch.optim.Adam(classifier_models[key].parameters(),
                                       lr=LEARNING_RATE_AUTOENC) for key in classifier_models.keys()}

In [15]:
def pass_through_autoenc_classifier(x, y, autoenc, classifier, loss_fn, passthrough_fnc):
    autoenc_out = passthrough_fnc(x)
    logits = classifier(autoenc_out)
    loss = loss_fn(logits, y)
    loss_val = loss.item()
    
    return loss, loss_val


def calculate_valid_loss_classifier(autoenc, model, valid_dataset, loss_fn, passthrough_fnc):

    dataloader = torch.utils.data.DataLoader(valid_dataset, 1, shuffle=False)
    loss_list = []

    for x, y in dataloader:
        x = x.to(DEVICE)
        y = y.type(torch.LongTensor).to(DEVICE)

        _, loss_val = pass_through_autoenc_classifier(x ,y, autoenc, model, loss_fn, passthrough_fnc)
        loss_list.append(loss_val)

    return np.mean(loss_list)


def train_classifier_model(autoenc_model, model, optimizer, train_dataset, valid_dataset, passthrough_fnc):
    autoenc_model.eval()
    model.train()

    loss_fn = torch.nn.CrossEntropyLoss()

    for epoch in range(CLASSIFIER_EPOCHS):
        print("\n Start of epoch {}/{}".format(epoch + 1, CLASSIFIER_EPOCHS))
        epoch_start_time = time.time()

        loss_list = []
        dataloader = torch.utils.data.DataLoader(train_dataset, BATCH_SIZE, shuffle=True)

        for x, y in dataloader:
            x = x.to(DEVICE)
            y = y.type(torch.LongTensor).to(DEVICE)

            loss, loss_val = pass_through_autoenc_classifier(x, y, autoenc_model, model, loss_fn, passthrough_fnc)
            loss_list.append(loss_val)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss = np.mean(loss_list)
#         valid_loss = calculate_valid_loss_classifier(autoenc_model, model, valid_dataset, loss_fn, passthrough_fnc)

        epoch_time = time.time() - epoch_start_time
        print(
            "\n Epoch {} complete in {:.2f}, train loss = {}, valid_loss = {}".format(epoch + 1, epoch_time, train_loss,
                                                                                      '?'))

In [16]:
for key in classifier_models.keys():
    print("Training classifier with oversamlping method:", key)
    train_classifier_model(autoenc_models[key], classifier_models[key], classifier_optims[key],
                          train_datasets[key], valid_dataset, autoenc_models[key].forward)

Training classifier with oversamlping method: SMOTE

 Start of epoch 1/25

 Epoch 1 complete in 17.37, train loss = 0.1122219420449121, valid_loss = ?

 Start of epoch 2/25

 Epoch 2 complete in 17.19, train loss = 0.07684888780196734, valid_loss = ?

 Start of epoch 3/25

 Epoch 3 complete in 17.21, train loss = 0.06526590049722175, valid_loss = ?

 Start of epoch 4/25

 Epoch 4 complete in 16.92, train loss = 0.05921061894685654, valid_loss = ?

 Start of epoch 5/25

 Epoch 5 complete in 16.82, train loss = 0.05585251286997369, valid_loss = ?

 Start of epoch 6/25

 Epoch 6 complete in 16.96, train loss = 0.054351375259886585, valid_loss = ?

 Start of epoch 7/25

 Epoch 7 complete in 16.81, train loss = 0.053128455095164014, valid_loss = ?

 Start of epoch 8/25

 Epoch 8 complete in 16.64, train loss = 0.05244757738193589, valid_loss = ?

 Start of epoch 9/25

 Epoch 9 complete in 17.08, train loss = 0.051964602780214836, valid_loss = ?

 Start of epoch 10/25

 Epoch 10 complete in 


 Epoch 5 complete in 16.74, train loss = 0.0032951773643331824, valid_loss = ?

 Start of epoch 6/25

 Epoch 6 complete in 16.72, train loss = 0.0032182518835528733, valid_loss = ?

 Start of epoch 7/25

 Epoch 7 complete in 16.65, train loss = 0.0031554920954712687, valid_loss = ?

 Start of epoch 8/25

 Epoch 8 complete in 16.75, train loss = 0.0030463654856005915, valid_loss = ?

 Start of epoch 9/25

 Epoch 9 complete in 16.53, train loss = 0.0030320481071930664, valid_loss = ?

 Start of epoch 10/25

 Epoch 10 complete in 16.47, train loss = 0.0029607184259226836, valid_loss = ?

 Start of epoch 11/25

 Epoch 11 complete in 16.46, train loss = 0.0029497857262439785, valid_loss = ?

 Start of epoch 12/25

 Epoch 12 complete in 16.43, train loss = 0.0029282465293400564, valid_loss = ?

 Start of epoch 13/25

 Epoch 13 complete in 16.78, train loss = 0.002931760378511811, valid_loss = ?

 Start of epoch 14/25

 Epoch 14 complete in 16.76, train loss = 0.0028577372473477863, valid_lo

In [17]:
def evaluate_predictions(auto_enc, model, dataset, passthrough_fnc):
    dataloader = torch.utils.data.DataLoader(dataset, 1, shuffle=False)

    predictions = []
    ground_truth = []
    for x, y in tqdm(dataloader):
        x = x.to(DEVICE)
        ground_truth.append(y.item())

        autoenc_out = passthrough_fnc(x)
        logits = model(autoenc_out)
        predictions.append(np.argmax(logits.detach().cpu().numpy()))
        
    return {
        'precision': precision_score(ground_truth, predictions),
        'recall': recall_score(ground_truth, predictions),
        'F1': f1_score(ground_truth, predictions),
        'balanced accuracy': balanced_accuracy_score(ground_truth, predictions)
    }

In [18]:
for key in classifier_models.keys():
    print("Results for model with oversampling algorthm:", key)
    pp.pprint(evaluate_predictions(autoenc_models[key], classifier_models[key],
                               test_dataset, autoenc_models[key].forward))

Results for model with oversampling algorthm: SMOTE


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.16326530612244897,
    'balanced accuracy': 0.9494393360644302,
    'precision': 0.08961911874533234,
    'recall': 0.916030534351145}
Results for model with oversampling algorthm: ADASYN


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.11697574893009988,
    'balanced accuracy': 0.9564575302130404,
    'precision': 0.06237322515212982,
    'recall': 0.9389312977099237}
Results for model with oversampling algorthm: SVMSMOTE


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.3049095607235143,
    'balanced accuracy': 0.9466881897803303,
    'precision': 0.18351477449455678,
    'recall': 0.9007633587786259}
Results for model with oversampling algorthm: None


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.7936507936507937,
    'balanced accuracy': 0.8815316497286179,
    'precision': 0.8264462809917356,
    'recall': 0.7633587786259542}
Results for model with oversampling algorthm: LORAS


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.8091603053435116,
    'balanced accuracy': 0.9044042722141852,
    'precision': 0.8091603053435115,
    'recall': 0.8091603053435115}


In [19]:
hidden_classifier_models = {key:HiddenReprClassifier().to(DEVICE) for key in autoenc_models.keys()}
hidden_classifier_optims = {key:torch.optim.Adam(hidden_classifier_models[key].parameters(),
                                       lr=LEARNING_RATE_AUTOENC) for key in hidden_classifier_models.keys()}

In [20]:
for key in hidden_classifier_models.keys():
    print("Training hidden classifiers with oversamlping method:", key)
    train_classifier_model(autoenc_models[key], hidden_classifier_models[key], hidden_classifier_optims[key],
                          train_datasets[key], valid_dataset, autoenc_models[key].get_enc)

Training hidden classifiers with oversamlping method: SMOTE

 Start of epoch 1/25

 Epoch 1 complete in 11.76, train loss = 0.1713374963193452, valid_loss = ?

 Start of epoch 2/25

 Epoch 2 complete in 11.58, train loss = 0.15106827853995966, valid_loss = ?

 Start of epoch 3/25

 Epoch 3 complete in 11.40, train loss = 0.14939921905237577, valid_loss = ?

 Start of epoch 4/25

 Epoch 4 complete in 11.79, train loss = 0.148053886810654, valid_loss = ?

 Start of epoch 5/25

 Epoch 5 complete in 11.67, train loss = 0.14599047291176107, valid_loss = ?

 Start of epoch 6/25

 Epoch 6 complete in 11.67, train loss = 0.1436586416753073, valid_loss = ?

 Start of epoch 7/25

 Epoch 7 complete in 10.86, train loss = 0.14067484536231373, valid_loss = ?

 Start of epoch 8/25

 Epoch 8 complete in 11.60, train loss = 0.1355396030057695, valid_loss = ?

 Start of epoch 9/25

 Epoch 9 complete in 11.67, train loss = 0.133407490775168, valid_loss = ?

 Start of epoch 10/25

 Epoch 10 complete in 1


 Epoch 5 complete in 15.36, train loss = 0.0037291112978078164, valid_loss = ?

 Start of epoch 6/25

 Epoch 6 complete in 15.37, train loss = 0.0036413334116187, valid_loss = ?

 Start of epoch 7/25

 Epoch 7 complete in 15.20, train loss = 0.0036059000530591816, valid_loss = ?

 Start of epoch 8/25

 Epoch 8 complete in 15.20, train loss = 0.003559798316520322, valid_loss = ?

 Start of epoch 9/25

 Epoch 9 complete in 15.17, train loss = 0.003517565524279954, valid_loss = ?

 Start of epoch 10/25

 Epoch 10 complete in 14.88, train loss = 0.0035192462215523487, valid_loss = ?

 Start of epoch 11/25

 Epoch 11 complete in 14.86, train loss = 0.0035169761579048275, valid_loss = ?

 Start of epoch 12/25

 Epoch 12 complete in 14.82, train loss = 0.0034744528208509083, valid_loss = ?

 Start of epoch 13/25

 Epoch 13 complete in 14.87, train loss = 0.003460586284885462, valid_loss = ?

 Start of epoch 14/25

 Epoch 14 complete in 14.75, train loss = 0.0034387660425832726, valid_loss = 

In [21]:
for key in hidden_classifier_models.keys():
    print("Results for hidden_repr model with oversampling algorthm:", key)
    pp.pprint(evaluate_predictions(autoenc_models[key], hidden_classifier_models[key],
                               test_dataset, autoenc_models[key].get_enc))

Results for hidden_repr model with oversampling algorthm: SMOTE


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.10806174957118354,
    'balanced accuracy': 0.9663179525559915,
    'precision': 0.05724670604270786,
    'recall': 0.9618320610687023}
Results for hidden_repr model with oversampling algorthm: ADASYN


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.0400876918258691,
    'balanced accuracy': 0.9454448357792085,
    'precision': 0.020463629096722624,
    'recall': 0.9770992366412213}
Results for hidden_repr model with oversampling algorthm: SVMSMOTE


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.16643741403026138,
    'balanced accuracy': 0.9533757286687079,
    'precision': 0.09145880574452003,
    'recall': 0.9236641221374046}
Results for hidden_repr model with oversampling algorthm: None


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.7836734693877552,
    'balanced accuracy': 0.8662855798110072,
    'precision': 0.8421052631578947,
    'recall': 0.732824427480916}
Results for hidden_repr model with oversampling algorthm: LORAS


  0%|          | 0/71202 [00:00<?, ?it/s]

{   'F1': 0.24650571791613723,
    'balanced accuracy': 0.8662963206023092,
    'precision': 0.14786585365853658,
    'recall': 0.7404580152671756}


In [22]:
train_X, train_y = get_x_y(train_df)

valid_X, valid_y = get_x_y(valid_df)
test_X, test_y = get_x_y(test_df)

X = pd.concat([train_X, valid_X])
y = pd.concat([train_y, valid_y])

train_datasets = {key: oversamplers[key].fit_resample(X,y) for key in oversamplers.keys()}
train_datasets['LORAS'] = loras_oversample_dataframe(pd.concat([X, y] ,axis=1))

In [25]:
def eval_for_thresholds(y, predictions):
    return {thr: eval_for_threshhold(y, predictions, thr) for thr in np.arange(0.2, 0.9, 0.1)}


def eval_for_threshhold(ground_truth, predictions, thr):
    class_predicitons = (predictions[:,1] >= thr).astype(bool)
    return  {
        'precision': precision_score(ground_truth, class_predicitons),
        'recall': recall_score(ground_truth, class_predicitons),
        'F1': f1_score(ground_truth, class_predicitons),
        'balanced accuracy': balanced_accuracy_score(ground_truth, class_predicitons)
    } 


def random_forest_train_eval(train_X, train_y, testX, test_y, autoencoder = None):
    rf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=42, n_jobs=-1)
    if autoencoder is not None:
        if type(train_X) is not np.ndarray:
            train_X = autoencoder(torch.tensor(train_X.to_numpy()).to(DEVICE)).cpu().detach().numpy()
        else:
            train_X = autoencoder(torch.tensor(train_X).to(DEVICE)).cpu().detach().numpy()
    rf.fit(train_X, train_y)
    predictions = rf.predict_proba(test_X)
    return eval_for_thresholds(test_y.values, predictions)


def linear_regression_train_eval(train_X, train_y, testX, test_y, autoencoder = None):
    lr = LogisticRegression(random_state=42, C=.005, solver='lbfgs', multi_class='multinomial',
                            max_iter=685, n_jobs=-1)
    if autoencoder is not None:
        if type(train_X) is not np.ndarray:
            train_X = autoencoder(torch.tensor(train_X.to_numpy()).to(DEVICE)).cpu().detach().numpy()
        else:
            train_X = autoencoder(torch.tensor(train_X).to(DEVICE)).cpu().detach().numpy()
    lr.fit(train_X, train_y)
    predictions = lr.predict_proba(test_X)
    return eval_for_thresholds(test_y.values, predictions)

In [26]:
for key in train_datasets.keys():
    print("Using oversampling algorithm:", key)
    curr_X, curr_y = train_datasets[key]
    
    lr_results = linear_regression_train_eval(curr_X, curr_y, test_X, test_y)
    print("Logistic regression(No DAE):")
    pp.pprint(lr_results)
    
    lr_dae_results = linear_regression_train_eval(curr_X, curr_y, test_X, test_y, autoenc_models[key])
    print("Logistic regression (With DAE):")
    pp.pprint(lr_dae_results)
    
    rf_results = random_forest_train_eval(curr_X, curr_y, test_X, test_y)
    print("Random forest (No DAE):")
    pp.pprint(rf_results)
    
    rf_dae_results = random_forest_train_eval(curr_X, curr_y, test_X, test_y, autoenc_models[key])
    print("Random forest(DAE):")
    pp.pprint(rf_dae_results)
    
    print("==========================")

Using oversampling algorithm: SMOTE
Logistic regression(No DAE):
{   0.2: {   'F1': 0.009103322712790168,
             'balanced accuracy': 0.7970865281369528,
             'precision': 0.0045726345409778405,
             'recall': 0.9923664122137404},
    0.30000000000000004: {   'F1': 0.009899105273177233,
                             'balanced accuracy': 0.8132393893602365,
                             'precision': 0.00497436289890564,
                             'recall': 0.9923664122137404},
    0.4000000000000001: {   'F1': 0.010622650759928094,
                            'balanced accuracy': 0.8258253949039887,
                            'precision': 0.005339905524748409,
                            'recall': 0.9923664122137404},
    0.5000000000000001: {   'F1': 0.011401508507279426,
                            'balanced accuracy': 0.8375882799063102,
                            'precision': 0.005733692056631236,
                            'recall': 0.9923664122137404},
   

Logistic regression (With DAE):
{   0.2: {   'F1': 0.00835512468907456,
             'balanced accuracy': 0.7812328516553868,
             'precision': 0.0041950875844621646,
             'recall': 1.0},
    0.30000000000000004: {   'F1': 0.009211081423147237,
                             'balanced accuracy': 0.8017334777898157,
                             'precision': 0.004626849856956169,
                             'recall': 1.0},
    0.4000000000000001: {   'F1': 0.009986659043262817,
                            'balanced accuracy': 0.8172742750207539,
                            'precision': 0.005018387986515477,
                            'recall': 1.0},
    0.5000000000000001: {   'F1': 0.010778344577916734,
                            'balanced accuracy': 0.8308311406902956,
                            'precision': 0.005418372833684907,
                            'recall': 1.0},
    0.6000000000000001: {   'F1': 0.011578063546776261,
                            'balanced ac

Random forest (No DAE):
{   0.2: {   'F1': 0.01819592354891036,
             'balanced accuracy': 0.894443208656734,
             'precision': 0.009182801822323462,
             'recall': 0.9847328244274809},
    0.30000000000000004: {   'F1': 0.05495456512332323,
                             'balanced accuracy': 0.9540311317539572,
                             'precision': 0.028278779781785793,
                             'recall': 0.9694656488549618},
    0.4000000000000001: {   'F1': 0.14397120575884823,
                            'balanced accuracy': 0.9480533980587738,
                            'precision': 0.078125,
                            'recall': 0.916030534351145},
    0.5000000000000001: {   'F1': 0.2553191489361702,
                            'balanced accuracy': 0.9531680017649269,
                            'precision': 0.14833127317676142,
                            'recall': 0.916030534351145},
    0.6000000000000001: {   'F1': 0.475609756097561,
            

Random forest(DAE):
{   0.2: {   'F1': 0.006810855776229595,
             'balanced accuracy': 0.7312124495223087,
             'precision': 0.003417064454704333,
             'recall': 1.0},
    0.30000000000000004: {   'F1': 0.017109887923602362,
                             'balanced accuracy': 0.8881115121841925,
                             'precision': 0.00862991704575863,
                             'recall': 0.9847328244274809},
    0.4000000000000001: {   'F1': 0.065847922654821,
                            'balanced accuracy': 0.9558003011932696,
                            'precision': 0.03409090909090909,
                            'recall': 0.9618320610687023},
    0.5000000000000001: {   'F1': 0.2523860021208908,
                            'balanced accuracy': 0.9493230669985857,
                            'precision': 0.14655172413793102,
                            'recall': 0.9083969465648855},
    0.6000000000000001: {   'F1': 0.5968586387434556,
                 