# 1 Introduction

This notebook demonstrates the functionality of imbtools.evaluation module on simulated imbalanced data.

# 2 Imports

In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE, RandomOverSampler
from imbtools.evaluation import BinaryExperiment
from imbtools.evaluation import summarize_datasets, calculate_stats, calculate_optimal_stats, calculate_optimal_stats_wide
from imbtools.evaluation import calculate_ranking, calculate_mean_ranking, calculate_friedman_test
from imbtools.evaluation import load_experiment
from imbtools.utils import _flatten_parameters_list

# 3 Simulate data

#### 3.1 Datasets parameters

In [2]:
datasets_param_grid = [
    {
    "n_classes": [2],
    "class_sep": [0.9, 2], 
    "n_clusters_per_class": [1, 2], 
    "weights": [[0.95, 0.05]], 
    "n_samples": [500, 1000], 
    "n_features": [10], 
    "n_redundant": [0], 
    "random_state": [5]
    }, 
    {
    "n_classes": [2],
    "class_sep": [1], 
    "n_clusters_per_class": [1, 2, 3], 
    "weights": [[0.85, 0.15]], 
    "n_samples": [500, 1000], 
    "n_features": [5], 
    "n_informative": [3], 
    "random_state": [5]
    }
]

datasets_parameters = _flatten_parameters_list(datasets_param_grid)

#### 3.2 Append datasets to container

In [3]:
imbalanced_datasets = []
for ind, parameters in enumerate(datasets_parameters):
    imbalanced_datasets.append(("dataset" + str(ind + 1), make_classification(**parameters)))

# 4 Experiment

##### 4.1 Configure experiment

In [4]:
dt_param_grid = [{'max_depth': [2, 5]}, {'criterion': ['gini', 'entropy']}]
classifiers = [("LR", LogisticRegression()), ("DT", DecisionTreeClassifier(), dt_param_grid)]

In [5]:
smote_param_grid = [{'k_neighbors': [3, 6]}]
oversamplers = [("No oversampling", None), ("Random oversampler", RandomOverSampler()), ("SMOTE", SMOTE(), smote_param_grid)]

In [6]:
scoring=['roc_auc', 'f1', 'geometric_mean_score', 'tp', 'tn']

In [7]:
experiment = BinaryExperiment(imbalanced_datasets, classifiers, oversamplers, scoring=scoring, random_state=1)

#### 4.2 Run experiment

In [8]:
experiment.run()

 99% (1399 of 1400) |##################################################### | Elapsed Time: 0:00:52 ETA: 0:00:00

#### 4.3 Datasets summary

In [9]:
summarize_datasets(imbalanced_datasets)

Unnamed: 0,Dataset name,# of features,# of instances,# of minority instances,# of majority instances,Imbalance Ratio
0,dataset1,10,500,29,471,16.24
1,dataset2,10,1000,54,946,17.52
2,dataset3,10,500,27,473,17.52
...,...,...,...,...,...,...
11,dataset12,5,1000,148,852,5.76
12,dataset13,5,500,77,423,5.49
13,dataset14,5,1000,150,850,5.67


#### 4.4 Non aggregated results across experiments

In [10]:
experiment.results_

Unnamed: 0,Dataset,Classifier,Oversampler,Metric,CV score
0,dataset1,LR,No oversampling,roc_auc,0.899764
1,dataset1,LR,No oversampling,f1,0.763889
2,dataset1,LR,No oversampling,geometric_mean_score,0.787874
...,...,...,...,...,...
6997,dataset14,DT4,SMOTE2,geometric_mean_score,0.809703
6998,dataset14,DT4,SMOTE2,tp,258.333333
6999,dataset14,DT4,SMOTE2,tn,36.000000


#### 4.4 Aggregated results across experiments

In [11]:
calculate_stats(experiment)

Unnamed: 0,Dataset,Classifier,Oversampler,Metric,Mean CV score,Std CV score
0,dataset1,DT1,No oversampling,f1,0.689451,0.064841
1,dataset1,DT1,No oversampling,geometric_mean_score,0.755530,0.046093
2,dataset1,DT1,No oversampling,roc_auc,0.804763,0.038830
...,...,...,...,...,...,...
1397,dataset9,LR,SMOTE2,roc_auc,0.996776,0.000423
1398,dataset9,LR,SMOTE2,tn,24.133333,0.182574
1399,dataset9,LR,SMOTE2,tp,140.666667,0.000000


#### 4.5 Optimal aggregated results across experiments

In [12]:
calculate_optimal_stats(experiment)

Unnamed: 0,Dataset,Classifier,Oversampler,Metric,Mean CV score,Std CV score
0,dataset1,LR,No oversampling,f1,0.755696,0.010817
1,dataset1,LR,No oversampling,geometric_mean_score,0.781964,0.007516
2,dataset1,LR,No oversampling,roc_auc,0.923213,0.016755
...,...,...,...,...,...,...
417,dataset14,DT,SMOTE,roc_auc,0.846433,0.032887
418,dataset14,DT,SMOTE,tn,40.000000,2.768875
419,dataset14,DT,SMOTE,tp,260.066667,2.046678


#### 4.6 Optimal aggregated results across experiments in wide format

In [13]:
calculate_optimal_stats_wide(experiment)

Unnamed: 0,Dataset,Classifier,Metric,No oversampling,Random oversampler,SMOTE
0,dataset1,DT,f1,"(0.768157321408, 0.0310364004143)","(0.741631224526, 0.038328297645)","(0.788369527503, 0.0237540345262)"
1,dataset1,DT,geometric_mean_score,"(0.837869027899, 0.0239272087633)","(0.851711578736, 0.0260935267616)","(0.852805014079, 0.026345661213)"
2,dataset1,DT,roc_auc,"(0.850561453173, 0.0306060642791)","(0.859054022175, 0.0278746910133)","(0.873043170559, 0.0357655067659)"
...,...,...,...,...,...,...
137,dataset9,LR,roc_auc,"(0.996681250624, 0.000416811630215)","(0.996756634369, 0.000401703925665)","(0.996813105584, 0.000384225127748)"
138,dataset9,LR,tn,"(23.6, 0.1490711985)","(24.1333333333, 0.182574185835)","(24.1333333333, 0.182574185835)"
139,dataset9,LR,tp,"(141.333333333, 0.0)","(140.733333333, 0.1490711985)","(140.8, 0.182574185835)"


#### 4.7 Oversamplers ranking

In [14]:
calculate_ranking(experiment)

Unnamed: 0,Dataset,Classifier,Metric,No oversampling,Random oversampler,SMOTE
0,dataset1,DT,f1,2,3,1
1,dataset1,DT,geometric_mean_score,3,2,1
2,dataset1,DT,roc_auc,3,2,1
...,...,...,...,...,...,...
137,dataset9,LR,roc_auc,3,2,1
138,dataset9,LR,tn,3,2,1
139,dataset9,LR,tp,1,3,2


#### 4.8 Oversamplers mean ranking across datasets

In [15]:
calculate_mean_ranking(experiment)

Unnamed: 0,Classifier,Metric,No oversampling,Random oversampler,SMOTE
0,DT,f1,2.000000,2.214286,1.785714
1,DT,geometric_mean_score,2.714286,2.000000,1.285714
2,DT,roc_auc,2.500000,2.214286,1.285714
...,...,...,...,...,...
7,LR,roc_auc,1.571429,2.500000,1.928571
8,LR,tn,3.000000,1.500000,1.500000
9,LR,tp,1.000000,3.000000,2.000000


#### 4.9 Friedman test across datasets for every combination of classifiers and metrics

In [16]:
calculate_friedman_test(experiment)

Unnamed: 0,Classifier,Metric,p-value
0,DT,f1,5.257880e-01
1,DT,geometric_mean_score,7.904903e-04
2,DT,roc_auc,3.542732e-03
...,...,...,...
7,LR,roc_auc,4.635489e-02
8,LR,tn,2.753645e-05
9,LR,tp,8.315287e-07


# 5 Save and load experiment object

#### 5.1 Save

In [17]:
experiment.save('experiment.p')

#### 5.1 Load

In [18]:
loaded_experiment = load_experiment('experiment.p')