# 1 Introduction

This notebook demonstrates the functionality of hyperlearn module on the comparison of oversamplers on imbalanced datasets.

# 2 Imports

In [1]:
from hyperlearn.experiment import ResamplingExperiment
from hyperlearn.analysis import _extract_resamplers_classifiers
from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import SMOTE, RandomOverSampler

# 3 Experiment

##### 3.1 Configure experiment

In [2]:
bc = load_breast_cancer()
iris = load_iris()

X_bc, y_bc = bc.data, bc.target
X_iris, y_iris = iris.data, iris.target
y_iris[y_iris == 2] = 0

datasets = [
    ('bc', (X_bc, y_bc)),
    ('iris', (X_iris, y_iris))
]

smote_param_grid = [
    dict(kind=['borderline1', 'borderline2'], k_neighbors=[3, 6]),
    dict(k_neighbors=[2, 4])
]

dt_param_grid = [
    dict(max_depth=[2, 5]), 
    dict(criterion=['gini', 'entropy'], max_depth=[3, 6])
]

classifiers = [
    ('LR', LogisticRegression()), 
    ('DT', DecisionTreeClassifier(), dt_param_grid)
]

oversamplers = [
    ('No oversampling', None), 
    ('Random oversampler', RandomOverSampler()), 
    ('SMOTE', SMOTE(), smote_param_grid)
]

In [3]:
experiment = ResamplingExperiment(datasets, classifiers, oversamplers, n_jobs=-1)

#### 3.2 Run experiment

In [4]:
experiment.run()

100% (60 of 60) |#########################| Elapsed Time: 0:00:11 ETA:  0:00:00

In [5]:
estimators = _extract_resamplers_classifiers(experiment)

In [6]:
estimators

Unnamed: 0,Classifier,Resampler,Parameters
0,LR,No oversampling,{}
1,DT,No oversampling,{'DT__max_depth': 2}
2,DT,No oversampling,{'DT__max_depth': 5}
3,DT,No oversampling,"{'DT__criterion': 'gini', 'DT__max_depth': 3}"
4,DT,No oversampling,"{'DT__criterion': 'gini', 'DT__max_depth': 6}"
5,DT,No oversampling,"{'DT__criterion': 'entropy', 'DT__max_depth': 3}"
6,DT,No oversampling,"{'DT__criterion': 'entropy', 'DT__max_depth': 6}"
7,LR,Random oversampler,{}
8,DT,Random oversampler,{'DT__max_depth': 2}
9,DT,Random oversampler,{'DT__max_depth': 5}
