In [1]:
# Necessary imports
import xgboost as xgb
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from utils.load_dataset import load_dataset
from utils.split_x_y import split_x_y
from utils.printmd import printmd
from utils.classification import TrainEvaluation

In [2]:
# Load test dataset
test = load_dataset(dataset_type='test', encoding='oe')

In [3]:
# Split test dataset into features and target
test_X, test_y = split_x_y(df=test, target='returnShipment')

In [16]:
rf = RandomForestClassifier()
rf.fit(test_X, test_y.to_numpy().flatte())
y_pred = rf.predict(test_X)


(array([0, 1]), array([20310, 29768], dtype=int64))

In [4]:
# Benchmark datasets

In [5]:
# Load datasets
train_one = load_dataset(dataset_type='train', balance=1, encoding='oe')
train_three = load_dataset(dataset_type='train', balance=3, encoding='oe')
train_five = load_dataset(dataset_type='train', balance=5, encoding='oe')
train_twenty_five = load_dataset(dataset_type='train', balance=25, encoding='oe')
train_fifty = load_dataset(dataset_type='train', balance=50, encoding='oe')

In [6]:
# Split datasets into train and validation
train_one, validation_one = train_test_split(train_one, test_size=0.2, stratify=train_one['returnShipment'])
train_three, validation_three = train_test_split(train_three, test_size=0.2, stratify=train_three['returnShipment'])
train_five, validation_five = train_test_split(train_five, test_size=0.2, stratify=train_five['returnShipment'])
train_twenty_five, validation_twenty_five = train_test_split(train_twenty_five, test_size=0.2, stratify=train_twenty_five['returnShipment'])
train_fifty, validation_fifty = train_test_split(train_fifty, test_size=0.2, stratify=train_fifty['returnShipment'])

In [7]:
# Split datasets into features and target
train_one_X, train_one_y = split_x_y(df=train_one, target='returnShipment')
validation_one_X, validation_one_y = split_x_y(df=validation_one, target='returnShipment')

train_three_X, train_three_y = split_x_y(df=train_three, target='returnShipment')
validation_three_X, validation_three_y = split_x_y(df=validation_three, target='returnShipment')

train_five_X, train_five_y = split_x_y(df=train_five, target='returnShipment')
validation_five_X, validation_five_y = split_x_y(validation_five, target='returnShipment')

train_twenty_five_X, train_twenty_five_y = split_x_y(train_twenty_five, target='returnShipment')
validation_twenty_five_X, validation_twenty_five_y = split_x_y(validation_twenty_five, target='returnShipment')

train_fifty_X, train_fifty_y = split_x_y(train_fifty, target='returnShipment')
validation_fifty_X, validation_fifty_y = split_x_y(validation_fifty, target='returnShipment')

In [14]:
clf = xgb.XGBClassifier(objective='binary:logistic', eval_metric='error', use_label_encoder=False)
clf.fit(train_one_X, train_one_y)
y_score = clf.predict_proba(test_X)
print(clf.classes_)
print(y_score)
print(y_score[:, 1])

[0 1]
[[9.9943632e-01 5.6370621e-04]
 [9.9849206e-01 1.5079101e-03]
 [9.9600065e-01 3.9993702e-03]
 ...
 [9.9757940e-01 2.4206121e-03]
 [9.9994642e-01 5.3558702e-05]
 [9.9988049e-01 1.1948893e-04]]
[5.6370621e-04 1.5079101e-03 3.9993702e-03 ... 2.4206121e-03 5.3558702e-05
 1.1948893e-04]


In [8]:
# 1% class balance
TrainEvaluation(train_one_X, train_one_y, validation_one_X, validation_one_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.51
LogisticRegressionClassification:
GeometricMean validation: 0.0
GeometricMean test: 0.0
LogisticRegressionClassification:
F1Score validation: 0.0
F1Score test: 0.0
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.62
XGBoostClassification:
GeometricMean validation: 0.0
GeometricMean test: 0.0
XGBoostClassification:
F1Score validation: 0.0
F1Score test: 0.0
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.55
RandomForestClassification:
GeometricMean validation: 0.0
GeometricMean test: 0.01
RandomForestClassification:
F1Score validation: 0.0
F1Score test: 0.0

In [9]:
# 3% class balance
TrainEvaluation(train_three_X, train_three_y, validation_three_X, validation_three_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.54
LogisticRegressionClassification:
GeometricMean validation: 0.0
GeometricMean test: 0.0
LogisticRegressionClassification:
F1Score validation: 0.0
F1Score test: 0.0
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.64
XGBoostClassification:
GeometricMean validation: 0.0
GeometricMean test: 0.01
XGBoostClassification:
F1Score validation: 0.0
F1Score test: 0.0
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.62
RandomForestClassification:
GeometricMean validation: 0.0
GeometricMean test: 0.03
RandomForestClassification:
F1Score validation: 0.0
F1Score test: 0.0

In [10]:
# 5% class balance
TrainEvaluation(train_five_X, train_five_y, validation_five_X, validation_five_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.53
LogisticRegressionClassification:
GeometricMean validation: 0.0
GeometricMean test: 0.0
LogisticRegressionClassification:
F1Score validation: 0.0
F1Score test: 0.0
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.66
XGBoostClassification:
GeometricMean validation: 0.0
GeometricMean test: 0.03
XGBoostClassification:
F1Score validation: 0.0
F1Score test: 0.0
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.63
RandomForestClassification:
GeometricMean validation: 0.0
GeometricMean test: 0.03
RandomForestClassification:
F1Score validation: 0.0
F1Score test: 0.0

In [11]:
# 25% class balance
TrainEvaluation(train_twenty_five_X, train_twenty_five_y, validation_twenty_five_X, validation_twenty_five_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.5
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.56
LogisticRegressionClassification:
GeometricMean validation: 0.04
GeometricMean test: 0.04
LogisticRegressionClassification:
F1Score validation: 0.0
F1Score test: 0.0
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.55
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.66
XGBoostClassification:
GeometricMean validation: 0.4
GeometricMean test: 0.43
XGBoostClassification:
F1Score validation: 0.25
F1Score test: 0.32
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.53
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.65
RandomForestClassification:
GeometricMean validation: 0.28
GeometricMean test: 0.38
RandomForestClassification:
F1Score validation: 0.14
F1Score test: 0.25

In [12]:
# 50% class balance
TrainEvaluation(train_fifty_X, train_fifty_y, validation_fifty_X, validation_fifty_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.56
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.58
LogisticRegressionClassification:
GeometricMean validation: 0.56
GeometricMean test: 0.56
LogisticRegressionClassification:
F1Score validation: 0.55
F1Score test: 0.59
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.62
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.68
XGBoostClassification:
GeometricMean validation: 0.61
GeometricMean test: 0.6
XGBoostClassification:
F1Score validation: 0.64
F1Score test: 0.69
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.63
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.68
RandomForestClassification:
GeometricMean validation: 0.62
GeometricMean test: 0.61
RandomForestClassification:
F1Score validation: 0.66
F1Score test: 0.68

In [15]:
# Load datasets

# 1% class balance
train_one_random_under = load_dataset(dataset_type='train', balance=1, technique='ru', encoding='oe')
train_one_near_miss_under = load_dataset(dataset_type='train', balance=1, technique='nmu', encoding='oe')
train_one_random_over = load_dataset(dataset_type='train', balance=1, technique='ro', encoding='oe')
train_one_smote = load_dataset(dataset_type='train', balance=1, technique='snc', encoding='oe')

# 3% class balance
train_three_random_under = load_dataset(dataset_type='train', balance=3, technique='ru', encoding='oe')
train_three_near_miss_under = load_dataset(dataset_type='train', balance=3, technique='nmu', encoding='oe')
train_three_random_over = load_dataset(dataset_type='train', balance=3, technique='ro', encoding='oe')
train_three_smote = load_dataset(dataset_type='train', balance=3, technique='snc', encoding='oe')

# 5% class balance
train_five_random_under = load_dataset(dataset_type='train', balance=5, technique='ru', encoding='oe')
train_five_near_miss_under = load_dataset(dataset_type='train', balance=5, technique='nmu', encoding='oe')
train_five_random_over = load_dataset(dataset_type='train', balance=5, technique='ro', encoding='oe')
train_five_smote = load_dataset(dataset_type='train', balance=5, technique='snc', encoding='oe')

# 25% class balance
train_twenty_five_random_under = load_dataset(dataset_type='train', balance=25, technique='ru', encoding='oe')
train_twenty_five_near_miss_under = load_dataset(dataset_type='train', balance=25, technique='nmu', encoding='oe')
train_twenty_five_random_over = load_dataset(dataset_type='train', balance=25, technique='ro', encoding='oe')
train_twenty_five_smote = load_dataset(dataset_type='train', balance=25, technique='snc', encoding='oe')

# 50% class balance
train_fifty_random_under = load_dataset(dataset_type='train', balance=50, technique='ru', encoding='oe')
train_fifty_near_miss_under = load_dataset(dataset_type='train', balance=50, technique='nmu', encoding='oe')
train_fifty_random_over = load_dataset(dataset_type='train', balance=50, technique='ro', encoding='oe')
train_fifty_smote = load_dataset(dataset_type='train', balance=50, technique='snc', encoding='oe')

In [16]:
# Split datasets into train and validation

# 1% class balance
train_one_ru, validation_one_ru = train_test_split(train_one_random_under, test_size=0.2, stratify=train_one_random_under['returnShipment'])
train_one_nmu, validation_one_nmu = train_test_split(train_one_near_miss_under, test_size=0.2, stratify=train_one_near_miss_under['returnShipment'])
train_one_ro, validation_one_ro = train_test_split(train_one_random_over, test_size=0.2, stratify=train_one_random_over['returnShipment'])
train_one_snc, validation_one_snc = train_test_split(train_one_smote, test_size=0.2, stratify=train_one_smote['returnShipment'])

# 3% class balance
train_three_ru, validation_three_ru = train_test_split(train_three_random_under, test_size=0.2, stratify=train_three_random_under['returnShipment'])
train_three_nmu, validation_three_nmu = train_test_split(train_three_near_miss_under, test_size=0.2, stratify=train_three_near_miss_under['returnShipment'])
train_three_ro, validation_three_ro = train_test_split(train_three_random_over, test_size=0.2, stratify=train_three_random_over['returnShipment'])
train_three_snc, validation_three_snc = train_test_split(train_three_smote, test_size=0.2, stratify=train_three_smote['returnShipment'])

# 5% class balance
train_five_ru, validation_five_ru = train_test_split(train_five_random_under, test_size=0.2, stratify=train_five_random_under['returnShipment'])
train_five_nmu, validation_five_nmu = train_test_split(train_five_near_miss_under, test_size=0.2, stratify=train_five_near_miss_under['returnShipment'])
train_five_ro, validation_five_ro = train_test_split(train_five_random_over, test_size=0.2, stratify=train_five_random_over['returnShipment'])
train_five_snc, validation_five_snc = train_test_split(train_five_smote, test_size=0.2, stratify=train_five_smote['returnShipment'])

# 25% class balance
train_twenty_five_ru, validation_twenty_five_ru = train_test_split(train_twenty_five_random_under, test_size=0.2, stratify=train_twenty_five_random_under['returnShipment'])
train_twenty_five_nmu, validation_twenty_five_nmu = train_test_split(train_twenty_five_near_miss_under, test_size=0.2, stratify=train_twenty_five_near_miss_under['returnShipment'])
train_twenty_five_ro, validation_twenty_five_ro = train_test_split(train_twenty_five_random_over, test_size=0.2, stratify=train_twenty_five_random_over['returnShipment'])
train_twenty_five_snc, validation_twenty_five_snc = train_test_split(train_twenty_five_smote, test_size=0.2, stratify=train_twenty_five_smote['returnShipment'])

# 50% class balance
train_fifty_ru, validation_fifty_ru = train_test_split(train_fifty_random_under, test_size=0.2, stratify=train_fifty_random_under['returnShipment'])
train_fifty_nmu, validation_fifty_nmu = train_test_split(train_fifty_near_miss_under, test_size=0.2, stratify=train_fifty_near_miss_under['returnShipment'])
train_fifty_ro, validation_fifty_ro = train_test_split(train_fifty_random_over, test_size=0.2, stratify=train_fifty_random_over['returnShipment'])
train_fifty_snc, validation_fifty_snc = train_test_split(train_fifty_smote, test_size=0.2, stratify=train_fifty_smote['returnShipment'])

In [17]:
# Split datasets into features and target

# 1% class balance
train_one_ru_X, train_one_ru_y = split_x_y(df=train_one_ru, target='returnShipment')
validation_one_ru_X, validation_one_ru_y = split_x_y(df=validation_one_ru, target='returnShipment')

train_one_nmu_X, train_one_nmu_y = split_x_y(df=train_one_nmu, target='returnShipment')
validation_one_nmu_X, validation_one_nmu_y = split_x_y(df=validation_one_nmu, target='returnShipment')

train_one_ro_X, train_one_ro_y = split_x_y(df=train_one_ro, target='returnShipment')
validation_one_ro_X, validation_one_ro_y = split_x_y(df=validation_one_ro, target='returnShipment')

train_one_snc_X, train_one_snc_y = split_x_y(df=train_one_snc, target='returnShipment')
validation_one_snc_X, validation_one_snc_y = split_x_y(df=validation_one_snc, target='returnShipment')

# 3% class balance
train_three_ru_X, train_three_ru_y = split_x_y(df=train_three_ru, target='returnShipment')
validation_three_ru_X, validation_three_ru_y = split_x_y(df=validation_three_ru, target='returnShipment')

train_three_nmu_X, train_three_nmu_y = split_x_y(df=train_three_nmu, target='returnShipment')
validation_three_nmu_X, validation_three_nmu_y = split_x_y(df=validation_three_nmu, target='returnShipment')

train_three_ro_X, train_three_ro_y = split_x_y(df=train_three_ro, target='returnShipment')
validation_three_ro_X, validation_three_ro_y = split_x_y(df=validation_three_ro, target='returnShipment')

train_three_snc_X, train_three_snc_y = split_x_y(df=train_three_snc, target='returnShipment')
validation_three_snc_X, validation_three_snc_y = split_x_y(df=validation_three_snc, target='returnShipment')

# 5% class balance
train_five_ru_X, train_five_ru_y = split_x_y(df=train_five_ru, target='returnShipment')
validation_five_ru_X, validation_five_ru_y = split_x_y(df=validation_five_ru, target='returnShipment')

train_five_nmu_X, train_five_nmu_y = split_x_y(df=train_five_nmu, target='returnShipment')
validation_five_nmu_X, validation_five_nmu_y = split_x_y(df=validation_five_nmu, target='returnShipment')

train_five_ro_X, train_five_ro_y = split_x_y(df=train_five_ro, target='returnShipment')
validation_five_ro_X, validation_five_ro_y = split_x_y(df=validation_five_ro, target='returnShipment')

train_five_snc_X, train_five_snc_y = split_x_y(df=train_five_snc, target='returnShipment')
validation_five_snc_X, validation_five_snc_y = split_x_y(df=validation_five_snc, target='returnShipment')

# 25% class balance
train_twenty_five_ru_X, train_twenty_five_ru_y = split_x_y(df=train_twenty_five_ru, target='returnShipment')
validation_twenty_five_ru_X, validation_twenty_five_ru_y = split_x_y(df=validation_twenty_five_ru, target='returnShipment')

train_twenty_five_nmu_X, train_twenty_five_nmu_y = split_x_y(df=train_twenty_five_nmu, target='returnShipment')
validation_twenty_five_nmu_X, validation_twenty_five_nmu_y = split_x_y(df=validation_five_nmu, target='returnShipment')

train_twenty_five_ro_X, train_twenty_five_ro_y = split_x_y(df=train_twenty_five_ro, target='returnShipment')
validation_twenty_five_ro_X, validation_twenty_five_ro_y = split_x_y(df=validation_twenty_five_ro, target='returnShipment')

train_twenty_five_snc_X, train_twenty_five_snc_y = split_x_y(df=train_twenty_five_snc, target='returnShipment')
validation_twenty_five_snc_X, validation_twenty_five_snc_y = split_x_y(df=validation_twenty_five_snc, target='returnShipment')

# 50% class balance
train_fifty_ru_X, train_fifty_ru_y = split_x_y(df=train_fifty_ru, target='returnShipment')
validation_fifty_ru_X, validation_fifty_ru_y = split_x_y(df=validation_fifty_ru, target='returnShipment')

train_fifty_nmu_X, train_fifty_nmu_y = split_x_y(df=train_fifty_nmu, target='returnShipment')
validation_fifty_nmu_X, validation_fifty_nmu_y = split_x_y(df=validation_fifty_nmu, target='returnShipment')

train_fifty_ro_X, train_fifty_ro_y = split_x_y(df=train_fifty_ro, target='returnShipment')
validation_fifty_ro_X, validation_fifty_ro_y = split_x_y(df=validation_fifty_ro, target='returnShipment')

train_fifty_snc_X, train_fifty_snc_y = split_x_y(df=train_fifty_snc, target='returnShipment')
validation_fifty_snc_X, validation_fifty_snc_y = split_x_y(df=validation_fifty_snc, target='returnShipment')

In [16]:
# 1% class balance

In [17]:
TrainEvaluation(train_one_ru_X, train_one_ru_y, validation_one_ru_X, validation_one_ru_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.43
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.54
LogisticRegressionClassification:
GeometricMean validation: 0.43
GeometricMean test: 0.47
LogisticRegressionClassification:
F1Score validation: 0.41
F1Score test: 0.61
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.48
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.61
XGBoostClassification:
GeometricMean validation: 0.48
GeometricMean test: 0.54
XGBoostClassification:
F1Score validation: 0.48
F1Score test: 0.64
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.48
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.62
RandomForestClassification:
GeometricMean validation: 0.48
GeometricMean test: 0.55
RandomForestClassification:
F1Score validation: 0.47
F1Score test: 0.65

In [18]:
TrainEvaluation(train_one_nmu_X, train_one_nmu_y, validation_one_nmu_X, validation_one_nmu_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.82
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.51
LogisticRegressionClassification:
GeometricMean validation: 0.82
GeometricMean test: 0.16
LogisticRegressionClassification:
F1Score validation: 0.81
F1Score test: 0.66
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.8
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.54
XGBoostClassification:
GeometricMean validation: 0.8
GeometricMean test: 0.15
XGBoostClassification:
F1Score validation: 0.8
F1Score test: 0.66
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.8
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.56
RandomForestClassification:
GeometricMean validation: 0.8
GeometricMean test: 0.16
RandomForestClassification:
F1Score validation: 0.81
F1Score test: 0.67

In [19]:
TrainEvaluation(train_one_ro_X, train_one_ro_y, validation_one_ro_X, validation_one_ro_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.57
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.54
LogisticRegressionClassification:
GeometricMean validation: 0.56
GeometricMean test: 0.28
LogisticRegressionClassification:
F1Score validation: 0.54
F1Score test: 0.65
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 1.0
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.58
XGBoostClassification:
GeometricMean validation: 1.0
GeometricMean test: 0.02
XGBoostClassification:
F1Score validation: 1.0
F1Score test: 0.0
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 1.0
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.57
RandomForestClassification:
GeometricMean validation: 1.0
GeometricMean test: 0.01
RandomForestClassification:
F1Score validation: 1.0
F1Score test: 0.0

In [20]:
TrainEvaluation(train_one_snc_X, train_one_snc_y, validation_one_snc_X, validation_one_snc_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.6
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.51
LogisticRegressionClassification:
GeometricMean validation: 0.6
GeometricMean test: 0.44
LogisticRegressionClassification:
F1Score validation: 0.6
F1Score test: 0.6
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.99
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.61
XGBoostClassification:
GeometricMean validation: 0.99
GeometricMean test: 0.16
XGBoostClassification:
F1Score validation: 0.99
F1Score test: 0.05
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.98
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.63
RandomForestClassification:
GeometricMean validation: 0.98
GeometricMean test: 0.29
RandomForestClassification:
F1Score validation: 0.98
F1Score test: 0.16

In [21]:
# 3% class balance

In [22]:
TrainEvaluation(train_three_ru_X, train_three_ru_y, validation_three_ru_X, validation_three_ru_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.51
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.56
LogisticRegressionClassification:
GeometricMean validation: 0.51
GeometricMean test: 0.54
LogisticRegressionClassification:
F1Score validation: 0.48
F1Score test: 0.54
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.6
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.65
XGBoostClassification:
GeometricMean validation: 0.6
GeometricMean test: 0.6
XGBoostClassification:
F1Score validation: 0.61
F1Score test: 0.61
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.55
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.65
RandomForestClassification:
GeometricMean validation: 0.55
GeometricMean test: 0.6
RandomForestClassification:
F1Score validation: 0.56
F1Score test: 0.65

In [23]:
TrainEvaluation(train_three_nmu_X, train_three_nmu_y, validation_three_nmu_X, validation_three_nmu_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.77
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.54
LogisticRegressionClassification:
GeometricMean validation: 0.77
GeometricMean test: 0.22
LogisticRegressionClassification:
F1Score validation: 0.77
F1Score test: 0.66
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.84
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.61
XGBoostClassification:
GeometricMean validation: 0.84
GeometricMean test: 0.46
XGBoostClassification:
F1Score validation: 0.84
F1Score test: 0.7
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.84
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.62
RandomForestClassification:
GeometricMean validation: 0.84
GeometricMean test: 0.45
RandomForestClassification:
F1Score validation: 0.84
F1Score test: 0.7

In [24]:
TrainEvaluation(train_three_ro_X, train_three_ro_y, validation_three_ro_X, validation_three_ro_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.56
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.57
LogisticRegressionClassification:
GeometricMean validation: 0.55
GeometricMean test: 0.55
LogisticRegressionClassification:
F1Score validation: 0.53
F1Score test: 0.51
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 1.0
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.63
XGBoostClassification:
GeometricMean validation: 1.0
GeometricMean test: 0.07
XGBoostClassification:
F1Score validation: 1.0
F1Score test: 0.01
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 1.0
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.63
RandomForestClassification:
GeometricMean validation: 1.0
GeometricMean test: 0.01
RandomForestClassification:
F1Score validation: 1.0
F1Score test: 0.0

In [18]:
TrainEvaluation(train_three_snc_X, train_three_snc_y, validation_three_snc_X, validation_three_snc_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.95
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.65
RandomForestClassification:
F1Score validation: 0.95
F1Score test: 0.24
RandomForestClassification:
GeometricMean validation: 0.95
GeometricMean test: 0.37
LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.6
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.54
LogisticRegressionClassification:
F1Score validation: 0.62
F1Score test: 0.52
LogisticRegressionClassification:
GeometricMean validation: 0.6
GeometricMean test: 0.53
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.98
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.64
XGBoostClassification:
F1Score validation: 0.98
F1Score test: 0.08
XGBoostClassification:
GeometricMean validation: 0.98
GeometricMean test: 0.2

In [26]:
# 5% class balance

In [27]:
TrainEvaluation(train_five_ru_X, train_five_ru_y, validation_five_ru_X, validation_five_ru_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.56
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.57
LogisticRegressionClassification:
GeometricMean validation: 0.55
GeometricMean test: 0.53
LogisticRegressionClassification:
F1Score validation: 0.53
F1Score test: 0.61
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.6
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.66
XGBoostClassification:
GeometricMean validation: 0.6
GeometricMean test: 0.61
XGBoostClassification:
F1Score validation: 0.59
F1Score test: 0.65
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.64
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.66
RandomForestClassification:
GeometricMean validation: 0.64
GeometricMean test: 0.6
RandomForestClassification:
F1Score validation: 0.64
F1Score test: 0.64

In [28]:
TrainEvaluation(train_five_nmu_X, train_five_nmu_y, validation_five_nmu_X, validation_five_nmu_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.72
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.55
LogisticRegressionClassification:
GeometricMean validation: 0.72
GeometricMean test: 0.45
LogisticRegressionClassification:
F1Score validation: 0.7
F1Score test: 0.63
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.75
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.62
XGBoostClassification:
GeometricMean validation: 0.75
GeometricMean test: 0.52
XGBoostClassification:
F1Score validation: 0.75
F1Score test: 0.68
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.78
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.62
RandomForestClassification:
GeometricMean validation: 0.78
GeometricMean test: 0.46
RandomForestClassification:
F1Score validation: 0.78
F1Score test: 0.7

In [29]:
TrainEvaluation(train_five_ro_X, train_five_ro_y, validation_five_ro_X, validation_five_ro_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.57
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.58
LogisticRegressionClassification:
GeometricMean validation: 0.57
GeometricMean test: 0.52
LogisticRegressionClassification:
F1Score validation: 0.56
F1Score test: 0.62
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 1.0
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.63
XGBoostClassification:
GeometricMean validation: 1.0
GeometricMean test: 0.11
XGBoostClassification:
F1Score validation: 1.0
F1Score test: 0.02
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 1.0
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.65
RandomForestClassification:
GeometricMean validation: 1.0
GeometricMean test: 0.02
RandomForestClassification:
F1Score validation: 1.0
F1Score test: 0.0

In [30]:
TrainEvaluation(train_five_snc_X, train_five_snc_y, validation_five_snc_X, validation_five_snc_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.6
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.53
LogisticRegressionClassification:
GeometricMean validation: 0.6
GeometricMean test: 0.52
LogisticRegressionClassification:
F1Score validation: 0.61
F1Score test: 0.52
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.97
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.64
XGBoostClassification:
GeometricMean validation: 0.97
GeometricMean test: 0.26
XGBoostClassification:
F1Score validation: 0.97
F1Score test: 0.12
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.93
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.65
RandomForestClassification:
GeometricMean validation: 0.93
GeometricMean test: 0.38
RandomForestClassification:
F1Score validation: 0.93
F1Score test: 0.26

In [31]:
# 25% class balance

In [32]:
TrainEvaluation(train_twenty_five_ru_X, train_twenty_five_ru_y, validation_twenty_five_ru_X, validation_twenty_five_ru_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.56
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.58
LogisticRegressionClassification:
GeometricMean validation: 0.56
GeometricMean test: 0.56
LogisticRegressionClassification:
F1Score validation: 0.54
F1Score test: 0.57
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.61
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.66
XGBoostClassification:
GeometricMean validation: 0.61
GeometricMean test: 0.61
XGBoostClassification:
F1Score validation: 0.62
F1Score test: 0.64
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.63
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
RandomForestClassification:
GeometricMean validation: 0.62
GeometricMean test: 0.61
RandomForestClassification:
F1Score validation: 0.67
F1Score test: 0.64

In [33]:
TrainEvaluation(train_twenty_five_nmu_X, train_twenty_five_nmu_y, validation_twenty_five_nmu_X, validation_twenty_five_nmu_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.67
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.56
LogisticRegressionClassification:
GeometricMean validation: 0.67
GeometricMean test: 0.3
LogisticRegressionClassification:
F1Score validation: 0.65
F1Score test: 0.66
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.84
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.64
XGBoostClassification:
GeometricMean validation: 0.84
GeometricMean test: 0.51
XGBoostClassification:
F1Score validation: 0.83
F1Score test: 0.69
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.87
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.65
RandomForestClassification:
GeometricMean validation: 0.87
GeometricMean test: 0.5
RandomForestClassification:
F1Score validation: 0.86
F1Score test: 0.69

In [34]:
TrainEvaluation(train_twenty_five_ro_X, train_twenty_five_ro_y, validation_twenty_five_ro_X, validation_twenty_five_ro_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.56
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.58
LogisticRegressionClassification:
GeometricMean validation: 0.56
GeometricMean test: 0.56
LogisticRegressionClassification:
F1Score validation: 0.54
F1Score test: 0.57
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.88
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.66
XGBoostClassification:
GeometricMean validation: 0.88
GeometricMean test: 0.57
XGBoostClassification:
F1Score validation: 0.89
F1Score test: 0.52
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.89
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.66
RandomForestClassification:
GeometricMean validation: 0.89
GeometricMean test: 0.43
RandomForestClassification:
F1Score validation: 0.9
F1Score test: 0.32

In [35]:
TrainEvaluation(train_twenty_five_snc_X, train_twenty_five_snc_y, validation_twenty_five_snc_X, validation_twenty_five_snc_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.57
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.57
LogisticRegressionClassification:
GeometricMean validation: 0.57
GeometricMean test: 0.55
LogisticRegressionClassification:
F1Score validation: 0.56
F1Score test: 0.52
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.8
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
XGBoostClassification:
GeometricMean validation: 0.8
GeometricMean test: 0.54
XGBoostClassification:
F1Score validation: 0.8
F1Score test: 0.47
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.77
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
RandomForestClassification:
GeometricMean validation: 0.77
GeometricMean test: 0.58
RandomForestClassification:
F1Score validation: 0.77
F1Score test: 0.53

In [36]:
# 50% class balance

In [37]:
TrainEvaluation(train_fifty_ru_X, train_fifty_ru_y, validation_fifty_ru_X, validation_fifty_ru_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


RandomForestClassification:
Accuracy validation: 0.63
Accuracy test: 0.62
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.63
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
RandomForestClassification:
F1Score validation: 0.66
F1Score test: 0.65
LogisticRegressionClassification:
Accuracy validation: 0.56
Accuracy test: 0.56
LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.56
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.58
LogisticRegressionClassification:
F1Score validation: 0.55
F1Score test: 0.57
XGBoostClassification:
Accuracy validation: 0.62
Accuracy test: 0.62
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.62
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
XGBoostClassification:
F1Score validation: 0.64
F1Score test: 0.64

In [38]:
TrainEvaluation(train_fifty_nmu_X, train_fifty_nmu_y, validation_fifty_nmu_X, validation_fifty_nmu_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


RandomForestClassification:
Accuracy validation: 0.63
Accuracy test: 0.63
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.63
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
RandomForestClassification:
F1Score validation: 0.66
F1Score test: 0.68
LogisticRegressionClassification:
Accuracy validation: 0.55
Accuracy test: 0.56
LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.55
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.58
LogisticRegressionClassification:
F1Score validation: 0.53
F1Score test: 0.59
XGBoostClassification:
Accuracy validation: 0.61
Accuracy test: 0.62
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.61
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
XGBoostClassification:
F1Score validation: 0.62
F1Score test: 0.67

In [39]:
TrainEvaluation(train_fifty_ro_X, train_fifty_ro_y, validation_fifty_ro_X, validation_fifty_ro_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


RandomForestClassification:
Accuracy validation: 0.63
Accuracy test: 0.63
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.63
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
RandomForestClassification:
F1Score validation: 0.65
F1Score test: 0.67
LogisticRegressionClassification:
Accuracy validation: 0.56
Accuracy test: 0.56
LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.56
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.58
LogisticRegressionClassification:
F1Score validation: 0.54
F1Score test: 0.57
XGBoostClassification:
Accuracy validation: 0.62
Accuracy test: 0.62
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.62
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
XGBoostClassification:
F1Score validation: 0.64
F1Score test: 0.68

In [40]:
TrainEvaluation(train_fifty_snc_X, train_fifty_snc_y, validation_fifty_snc_X, validation_fifty_snc_y, test_X, test_y, ('xgboost', 'random_forest', 'logistic_regression'), ('f1_score', 'roc_auc_score', 'geometric_mean_score'))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


RandomForestClassification:
Accuracy validation: 0.64
Accuracy test: 0.63
RandomForestClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.64
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.67
RandomForestClassification:
F1Score validation: 0.67
F1Score test: 0.68
LogisticRegressionClassification:
Accuracy validation: 0.55
Accuracy test: 0.56
LogisticRegressionClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.55
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.58
LogisticRegressionClassification:
F1Score validation: 0.53
F1Score test: 0.59
XGBoostClassification:
Accuracy validation: 0.62
Accuracy test: 0.62
XGBoostClassification:
AreaUnderTheReceiverOperatingCharacteristicCurve validation: 0.62
AreaUnderTheReceiverOperatingCharacteristicCurve test: 0.66
XGBoostClassification:
F1Score validation: 0.64
F1Score test: 0.67