[Reference](https://towardsdatascience.com/fighting-imbalance-data-set-with-code-examples-f2a3880700a6)

# Under-sampling

![under_sampling](https://miro.medium.com/max/1312/1*1kUdoh7nLRbXeLuWSSMUMw.png)

In [3]:
from collections import Counter
from sklearn.datasets import make_classification
from imblearn.under_sampling import RandomUnderSampler
X, y = make_classification(n_classes=2, class_sep=2,
                           weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
                           n_features=20, n_clusters_per_class=1, n_samples=1000, 
                           random_state=10)



In [4]:
print('Original dataset shape %s' % Counter(y))

Original dataset shape Counter({1: 900, 0: 100})


In [5]:
rus = RandomUnderSampler(random_state=42)

In [6]:
X_res, y_res = rus.fit_resample(X, y)



In [7]:
print('Resampled dataset shape %s' % Counter(y_res))

Resampled dataset shape Counter({0: 100, 1: 100})


In [8]:
from collections import Counter
from sklearn.datasets import make_classification
from imblearn.under_sampling import NearMiss

In [9]:
X, y = make_classification(n_classes=2, class_sep=2,
                           weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
                           n_features=20, n_clusters_per_class=1, n_samples=1000, 
                           random_state=10)

In [10]:
print('Original dataset shape %s' % Counter(y))

Original dataset shape Counter({1: 900, 0: 100})


In [11]:
nm = NearMiss()

In [12]:
X_res, y_res = nm.fit_resample(X, y)
print('Resampled dataset shape %s' % Counter(y_res))

Resampled dataset shape Counter({0: 100, 1: 100})




# Over-sampling

![over_sampling](https://miro.medium.com/max/1206/1*1PvOoL_lJlk6dvrf3rjqbQ.png)

In [13]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=5000, n_features=2, n_informative=2,
                            n_redundant=0, n_repeated=0, n_classes=3,
                            n_clusters_per_class=1,
                            weights=[0.01, 0.05, 0.94],
                            class_sep=0.8, random_state=0)
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=0)
X_resampled, y_resampled = ros.fit_resample(X, y)
from collections import Counter
print(sorted(Counter(y_resampled).items()))

[(0, 4674), (1, 4674), (2, 4674)]




# SMOTE — Synthetic Minority Over-sampling Technique

![SMOTE](https://miro.medium.com/max/1400/0*ou98izbOnE2ExKPl.png)

# Ensemble classifier using samplers internally

![EasyEnsemble](https://miro.medium.com/max/1400/1*B4iMLhQwWg92tqqM8hGYNQ.png)

In [14]:
from collections import Counter
from sklearn.datasets import make_classification
from imblearn.ensemble import EasyEnsemble 
X, y = make_classification(n_classes=2, class_sep=2,
                           weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
                           n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
print('Original dataset shape {}'.format(Counter(y)))
ee = EasyEnsemble(random_state=42)
X_res, y_res = ee.fit_sample(X, y)
print('Resampled dataset shape {}'.format(Counter(y_res[0])))

Original dataset shape Counter({1: 900, 0: 100})
Resampled dataset shape Counter({0: 100, 1: 100})


