# <font color='red'><ins> Tree ensembles: Random Forest and Boosting </ins></font>

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import math
import time
import pickle
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

# Sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
from sklearn.model_selection import cross_val_score

# Smote libraries
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

# Own libraries
import sys
sys.path.append('../src')
from preprocessing import *
from plotting import *
from utils import *

# 1. Random Forest

## 1.1. Hyperparameter tuning

To tune the hyperparameters of our models we are going to use the preprocessed dataset, i.e., the dataset where the stimulus start/end points are corrected.

Once, we have the best hyperparameters for a model, we are going to train-test again using only those hyperparameters but with a larger amount of repetitions to get a better generalization error estimation.

In [3]:
features = ['R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'Temp.', 'Humidity']

In [4]:
df_db_prep = group_datafiles_byID('../datasets/preprocessed/HT_Sensor_prep_metadata.dat', 
                                  '../datasets/preprocessed/HT_Sensor_prep_dataset.dat')
df_db_prep = reclassify_series_samples(df_db_prep)

In [5]:
n_estimators = [100, 300, 500]
criterions = ['gini', 'entropy']
max_depths = [3, 7, 11]

In [7]:
VAL_REPS = 3

for nest in n_estimators:
    for crit in criterions:
        for depth in max_depths:
            # Mean error arrays
            errs_acc = []
            errs_f1 = []
            for i in range(VAL_REPS):
                # Splitting set in train (80%) and test set
                df_train, df_test = split_series_byID(0.8, df_db_prep)
                xtrain, ytrain = df_train[features].values, df_train['class'].values
                xtest, ytest = df_test[features].values, df_test['class'].values

                # Init clf
                rfc = RandomForestClassifier(n_estimators=nest, criterion=crit, max_depth=depth, n_jobs=-1)
                # Train clf on train set
                rfc.fit(xtrain, ytrain)
                # Getting clf metrics on test set
                y_pred = rfc.predict(xtest)
                acc = accuracy_score(ytest, y_pred)
                f1 = f1_score(ytest, y_pred, average='weighted')
                errs_acc.append(acc)
                errs_f1.append(f1)

            errs_acc = np.asarray(errs_acc)
            errs_f1 = np.asarray(errs_f1)
            print('============================================')
            print('N estimators:', nest)
            print('Criterion:', crit)
            print('Max depth:', depth)
            print('Mean accuracy:', errs_acc.mean(), '+-', errs_acc.std())
            print('Mean F1-Score:', errs_f1.mean(), '+-', errs_f1.std())
            print('============================================')



N estimators: 100
Criterion: gini
Max depth: 3
Mean accuracy: 0.7946293751841381 +- 0.022340149438788016
Mean F1-Score: 0.7314064184956269 +- 0.030436776219018077
N estimators: 100
Criterion: gini
Max depth: 7
Mean accuracy: 0.8251705066757214 +- 0.030385405371009255
Mean F1-Score: 0.7917698460100201 +- 0.018411437142537727
N estimators: 100
Criterion: gini
Max depth: 11
Mean accuracy: 0.776987711616818 +- 0.014463698691850284
Mean F1-Score: 0.7329256062850488 +- 0.020797267302252044
N estimators: 100
Criterion: entropy
Max depth: 3
Mean accuracy: 0.8466922047650249 +- 0.02000570277513751
Mean F1-Score: 0.7974078702927899 +- 0.029569765180974403
N estimators: 100
Criterion: entropy
Max depth: 7
Mean accuracy: 0.8627773045945877 +- 0.02029268053720007
Mean F1-Score: 0.8392944292939127 +- 0.030397185278727448
N estimators: 100
Criterion: entropy
Max depth: 11
Mean accuracy: 0.835719748657015 +- 0.02583066191467667
Mean F1-Score: 0.7962037954114768 +- 0.035905305820405374
N estimators: 30

## 1.2 Performance in preprocessed dataset with best hyperparameters

In [9]:
REPS = 5

# Mean error arrays
errs_acc = []
errs_f1 = []
for i in range(REPS):
    # Splitting set in train (80%) and test set
    df_train, df_test = split_series_byID(0.8, df_db_prep)
    xtrain, ytrain = df_train[features].values, df_train['class'].values
    xtest, ytest = df_test[features].values, df_test['class'].values

    # Init clf
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    # Train clf on train set
    rfc.fit(xtrain, ytrain)
    # Getting clf metrics on test set
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc.append(acc)
    errs_f1.append(f1)
    print('Repetition ', i, 'done')

errs_acc = np.asarray(errs_acc)
errs_f1 = np.asarray(errs_f1)
print('==> Mean accuracy:', errs_acc.mean(), '+-', errs_acc.std())
print('==> Mean F1-Score:', errs_f1.mean(), '+-', errs_f1.std())

Repetition  0 done
Repetition  1 done
Repetition  2 done
Repetition  3 done
Repetition  4 done
==> Mean accuracy: 0.8590514681255197 +- 0.038124397895361636
==> Mean F1-Score: 0.8216484752586943 +- 0.04936510907701275


## 1.3 Performance in moving windows dataset with best hyperparameters

In [15]:
with open('../datasets/preprocessed/window120_dataset.pkl', 'rb') as f: 
    df_db_win = pickle.load(f)

In [16]:
features = ['R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'Temp.', 'Humidity',
            'R1_mean', 'R2_mean', 'R3_mean', 'R4_mean', 'R5_mean', 'R6_mean', 'R7_mean',
            'R8_mean', 'Temp._mean', 'Humidity_mean', 'R1_std', 'R2_std', 'R3_std', 'R4_std',
            'R5_std', 'R6_std', 'R7_std', 'R8_std', 'Temp._std', 'Humidity_std']

REPS = 5

# Mean error arrays
errs_acc = []
errs_f1 = []
for i in range(REPS):
    # Splitting set in train (80%) and test set
    df_train, df_test = split_series_byID(0.8, df_db_win)
    #df_train, df_test = norm_train_test(df_train, df_test, features)
    xtrain, ytrain = df_train[features].values, df_train['class'].values
    xtest, ytest = df_test[features].values, df_test['class'].values

    # Init clf
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    # Train clf on train set
    rfc.fit(xtrain, ytrain)
    # Getting clf metrics on test set
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc.append(acc)
    errs_f1.append(f1)
    print('Repetition', i+1, 'done')

errs_acc = np.asarray(errs_acc)
errs_f1 = np.asarray(errs_f1)
print('==> Mean accuracy:', errs_acc.mean(), '+-', errs_acc.std())
print('==> Mean F1-Score:', errs_f1.mean(), '+-', errs_f1.std())

Repetition 1 done
Repetition 2 done
Repetition 3 done
Repetition 4 done
Repetition 5 done
==> Mean accuracy: 0.8621029966033891 +- 0.016928431408150535
==> Mean F1-Score: 0.8283254526750359 +- 0.022128326408941596


## 1.4 Performance normalizing and using SMOTE

In the next cell we are going to execute **several Random Forest classifiers**.

The datasets used for training are:
- *Raw dataset*: initial dataset of the original authors article/repository.
- *Preprocessed dataset*: higher quality dataset, where certain values have been corrected (stimulus start/end points).
- *Moving windows dataset*: dataset where several features have been added to capture the last 120 samples mean and std (moving average + moving std).

For all these datasets, **normalization has been tested**, as weel as the **Synthetic Minority Oversample Technique (SMOTE) for imbalanced data**.

In [9]:
features = ['R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'Temp.', 'Humidity']

########################################################################################
#   RAW DATASET
#       Normalized and not normalized: using SMOTE in both cases
#########################################################################################
df_db_raw = group_datafiles_byID('../datasets/raw/HT_Sensor_metadata.dat', 
                             '../datasets/raw/HT_Sensor_dataset.dat')
df_db_raw = reclassify_series_samples(df_db_raw)

errs_acc = []
errs_f1 = []
errs_acc_smote = []
errs_f1_smote = []

# Oversampling and undersampling dictionary
over_dict = {'banana': 175000, 'wine': 175000}
under_dict = {'background': 500000}

# TRAINING WITHOUT NORMALIZING
for i in range(7):
    # Reading dataset and splitting
    df_train_raw, df_test_raw = split_series_byID(0.8, df_db_raw)
    xtrain, ytrain = df_train_raw[features].values, df_train_raw['class'].values
    xtest, ytest = df_test_raw[features].values, df_test_raw['class'].values

    # Training without using SMOTE
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=6, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc.append(acc)
    errs_f1.append(f1)

    # Training using SMOTE
    oversample = SMOTE(sampling_strategy=over_dict)
    undersample = RandomUnderSampler(sampling_strategy=under_dict)
    xtrain, ytrain = oversample.fit_resample(xtrain, ytrain)
    xtrain, ytrain = undersample.fit_resample(xtrain, ytrain)   
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=6, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc_smote.append(acc)
    errs_f1_smote.append(f1)

errs_acc = np.asarray(errs_acc)
errs_f1 = np.asarray(errs_f1)
errs_acc_smote = np.asarray(errs_acc_smote)
errs_f1_smote = np.asarray(errs_f1_smote)
print('==== Raw not norm ====')
print('Accuracy:', errs_acc.mean(), '+-', errs_acc.std())
print('f1-score:', errs_f1.mean(), '+-', errs_f1.std())
print('Accuracy (smote):', errs_acc_smote.mean(), '+-', errs_acc_smote.std())
print('f1-score (smote):', errs_f1_smote.mean(), '+-', errs_f1_smote.std())



errs_acc = []
errs_f1 = []
errs_acc_smote = []
errs_f1_smote = []

over_dict = {'banana': 175000, 'wine': 175000}
under_dict = {'background': 500000}

# TRAINING NORMALIZING
for i in range(7):
    # Reading dataset, splitting and normalizing
    df_train_raw, df_test_raw = split_series_byID(0.8, df_db_raw)
    df_train_raw, df_test_raw = norm_train_test(df_train_raw, df_test_raw, features)
    xtrain, ytrain = df_train_raw[features].values, df_train_raw['class'].values
    xtest, ytest = df_test_raw[features].values, df_test_raw['class'].values

    # Training without using SMOTE
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=6, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc.append(acc)
    errs_f1.append(f1)

    # Training using SMOTE
    oversample = SMOTE(sampling_strategy=over_dict)
    undersample = RandomUnderSampler(sampling_strategy=under_dict)
    xtrain, ytrain = oversample.fit_resample(xtrain, ytrain)
    xtrain, ytrain = undersample.fit_resample(xtrain, ytrain)   
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=6, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc_smote.append(acc)
    errs_f1_smote.append(f1)

errs_acc = np.asarray(errs_acc)
errs_f1 = np.asarray(errs_f1)
errs_acc_smote = np.asarray(errs_acc_smote)
errs_f1_smote = np.asarray(errs_f1_smote)
print('==== Raw normalized ====')
print('Accuracy:', errs_acc.mean(), '+-', errs_acc.std())
print('f1-score:', errs_f1.mean(), '+-', errs_f1.std())
print('Accuracy (smote):', errs_acc_smote.mean(), '+-', errs_acc_smote.std())
print('f1-score (smote):', errs_f1_smote.mean(), '+-', errs_f1_smote.std())



########################################################################################
#   PREPROCESSED DATASET
#       Normalized and not normalized: using SMOTE in both cases
#########################################################################################
df_db_prep = group_datafiles_byID('../datasets/preprocessed/HT_Sensor_prep_metadata.dat', 
                             '../datasets/preprocessed/HT_Sensor_prep_dataset.dat')
df_db_prep = reclassify_series_samples(df_db_prep)

features = ['R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'Temp.', 'Humidity']

errs_acc = []
errs_f1 = []
errs_acc_smote = []
errs_f1_smote = []

over_dict = {'banana': 175000, 'wine': 175000}
under_dict = {'background': 500000}

# TRAINING WITHOUT NORMALIZING
for i in range(7):
    # Reading dataset and splitting
    df_train_prep, df_test_prep = split_series_byID(0.8, df_db_prep)
    xtrain, ytrain = df_train_prep[features].values, df_train_prep['class'].values
    xtest, ytest = df_test_prep[features].values, df_test_prep['class'].values

    # Training without using SMOTE
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc.append(acc)
    errs_f1.append(f1)

    # Training using SMOTE
    oversample = SMOTE(sampling_strategy=over_dict)
    undersample = RandomUnderSampler(sampling_strategy=under_dict)
    xtrain, ytrain = oversample.fit_resample(xtrain, ytrain)
    xtrain, ytrain = undersample.fit_resample(xtrain, ytrain)   
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc_smote.append(acc)
    errs_f1_smote.append(f1)

errs_acc = np.asarray(errs_acc)
errs_f1 = np.asarray(errs_f1)
errs_acc_smote = np.asarray(errs_acc_smote)
errs_f1_smote = np.asarray(errs_f1_smote)
print('==== Prep not norm ====')
print('Accuracy:', errs_acc.mean(), '+-', errs_acc.std())
print('f1-score:', errs_f1.mean(), '+-', errs_f1.std())
print('Accuracy (smote):', errs_acc_smote.mean(), '+-', errs_acc_smote.std())
print('f1-score (smote):', errs_f1_smote.mean(), '+-', errs_f1_smote.std())



errs_acc = []
errs_f1 = []
errs_acc_smote = []
errs_f1_smote = []

over_dict = {'banana': 175000, 'wine': 175000}
under_dict = {'background': 500000}

# TRAINING NORMALIZING
for i in range(7):
    # Reading dataset, splitting and normalizing
    df_train_prep, df_test_prep = split_series_byID(0.8, df_db_prep)
    df_train_prep, df_test_prep = norm_train_test(df_train_prep, df_test_prep, features)
    xtrain, ytrain = df_train_prep[features].values, df_train_prep['class'].values
    xtest, ytest = df_test_prep[features].values, df_test_prep['class'].values

    # Training without using SMOTE
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc.append(acc)
    errs_f1.append(f1)

    # Training using SMOTE
    oversample = SMOTE(sampling_strategy=over_dict)
    undersample = RandomUnderSampler(sampling_strategy=under_dict)
    xtrain, ytrain = oversample.fit_resample(xtrain, ytrain)
    xtrain, ytrain = undersample.fit_resample(xtrain, ytrain)   
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc_smote.append(acc)
    errs_f1_smote.append(f1)

errs_acc = np.asarray(errs_acc)
errs_f1 = np.asarray(errs_f1)
errs_acc_smote = np.asarray(errs_acc_smote)
errs_f1_smote = np.asarray(errs_f1_smote)
print('==== Prep normalized ====')
print('Accuracy:', errs_acc.mean(), '+-', errs_acc.std())
print('f1-score:', errs_f1.mean(), '+-', errs_f1.std())
print('Accuracy (smote):', errs_acc_smote.mean(), '+-', errs_acc_smote.std())
print('f1-score (smote):', errs_f1_smote.mean(), '+-', errs_f1_smote.std())



########################################################################################
#   PREPROCESSED DATASET with MOVING WINDOWS
#       Normalized and not normalized: using SMOTE in both cases
#########################################################################################
with open('../datasets/preprocessed/window120_dataset.pkl', 'rb') as f: 
    df_db = pickle.load(f)

features = ['R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'Temp.', 'Humidity',
            'R1_mean', 'R2_mean', 'R3_mean', 'R4_mean', 'R5_mean', 'R6_mean', 'R7_mean',
            'R8_mean', 'Temp._mean', 'Humidity_mean', 'R1_std', 'R2_std', 'R3_std', 'R4_std',
            'R5_std', 'R6_std', 'R7_std', 'R8_std', 'Temp._std', 'Humidity_std']

errs_acc = []
errs_f1 = []
errs_acc_smote = []
errs_f1_smote = []

over_dict = {'banana': 175000, 'wine': 175000}
under_dict = {'background': 500000}

# TRAINING WITHOUT NORMALIZING
for i in range(7):
    # Reading dataset and splitting
    df_train, df_test = split_series_byID(0.8, df_db)
    xtrain, ytrain = df_train[features].values, df_train['class'].values
    xtest, ytest = df_test[features].values, df_test['class'].values

    # Training without using SMOTE
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc.append(acc)
    errs_f1.append(f1)

    # Training using SMOTE
    oversample = SMOTE(sampling_strategy=over_dict)
    undersample = RandomUnderSampler(sampling_strategy=under_dict)
    xtrain, ytrain = oversample.fit_resample(xtrain, ytrain)
    xtrain, ytrain = undersample.fit_resample(xtrain, ytrain)   
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc_smote.append(acc)
    errs_f1_smote.append(f1)

errs_acc = np.asarray(errs_acc)
errs_f1 = np.asarray(errs_f1)
errs_acc_smote = np.asarray(errs_acc_smote)
errs_f1_smote = np.asarray(errs_f1_smote)
print('==== Windows 120 not norm ====')
print('Accuracy:', errs_acc.mean(), '+-', errs_acc.std())
print('f1-score:', errs_f1.mean(), '+-', errs_f1.std())
print('Accuracy (smote):', errs_acc_smote.mean(), '+-', errs_acc_smote.std())
print('f1-score (smote):', errs_f1_smote.mean(), '+-', errs_f1_smote.std())


features = ['R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'Temp.', 'Humidity',
            'R1_mean', 'R2_mean', 'R3_mean', 'R4_mean', 'R5_mean', 'R6_mean', 'R7_mean',
            'R8_mean', 'Temp._mean', 'Humidity_mean', 'R1_std', 'R2_std', 'R3_std', 'R4_std',
            'R5_std', 'R6_std', 'R7_std', 'R8_std', 'Temp._std', 'Humidity_std']
errs_acc = []
errs_f1 = []
errs_acc_smote = []
errs_f1_smote = []

over_dict = {'banana': 175000, 'wine': 175000}
under_dict = {'background': 500000}

# TRAINING NORMALIZING
for i in range(7):
    # Reading dataset, splitting and normalizing
    df_train, df_test = split_series_byID(0.8, df_db)
    df_train, df_test = norm_train_test(df_train, df_test, features)
    xtrain, ytrain = df_train[features].values, df_train['class'].values
    xtest, ytest = df_test[features].values, df_test['class'].values

    # Training without using SMOTE
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc.append(acc)
    errs_f1.append(f1)

    # Training using SMOTE
    oversample = SMOTE(sampling_strategy=over_dict)
    undersample = RandomUnderSampler(sampling_strategy=under_dict)
    xtrain, ytrain = oversample.fit_resample(xtrain, ytrain)
    xtrain, ytrain = undersample.fit_resample(xtrain, ytrain)   
    rfc = RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=7, n_jobs=-1)
    rfc.fit(xtrain, ytrain)
    y_pred = rfc.predict(xtest)
    acc = accuracy_score(ytest, y_pred)
    f1 = f1_score(ytest, y_pred, average='weighted')
    errs_acc_smote.append(acc)
    errs_f1_smote.append(f1)

errs_acc = np.asarray(errs_acc)
errs_f1 = np.asarray(errs_f1)
errs_acc_smote = np.asarray(errs_acc_smote)
errs_f1_smote = np.asarray(errs_f1_smote)
print('==== Windows 120 normalized ====')
print('Accuracy:', errs_acc.mean(), '+-', errs_acc.std())
print('f1-score:', errs_f1.mean(), '+-', errs_f1.std())
print('Accuracy (smote):', errs_acc_smote.mean(), '+-', errs_acc_smote.std())
print('f1-score (smote):', errs_f1_smote.mean(), '+-', errs_f1_smote.std())

==== Raw not norm ====
Accuracy: 0.8418960522995799 +- 0.03632088039392034
f1-score: 0.7971172515127904 +- 0.04712454772292015
Accuracy (smote): 0.8277699338947454 +- 0.03764657496658198
f1-score (smote): 0.8082176890316616 +- 0.041986899096288714
==== Raw normalized ====
Accuracy: 0.8435238958626406 +- 0.016988139870408495
f1-score: 0.8044838931123677 +- 0.02726273359176542
Accuracy (smote): 0.8352368865135366 +- 0.029350892504258912
f1-score (smote): 0.8167438646556081 +- 0.03198988701165813
==== Prep not norm ====
Accuracy: 0.8065367076851159 +- 0.029776930251357886
f1-score: 0.7572592024620618 +- 0.03996370131297966
Accuracy (smote): 0.7907933461773212 +- 0.029474943405134558
f1-score (smote): 0.7667767335193576 +- 0.03574504446815725
==== Prep normalized ====
Accuracy: 0.8518799497840609 +- 0.046531772282859415
f1-score: 0.8152035473309679 +- 0.06113266960072451
Accuracy (smote): 0.8308586053612501 +- 0.05510826568942438
f1-score (smote): 0.816378820351669 +- 0.057696944279277226


# 2. Boosting