In [1]:
import numpy as np
import pandas as pd
import imblearn as imb
from imblearn.datasets import fetch_datasets

In [2]:
data = fetch_datasets()['ecoli']

In [3]:
X, y = data.data, data.target

In [4]:
df = pd.DataFrame(y, columns = ['Target'])
df.Target.value_counts()

-1    301
 1     35
Name: Target, dtype: int64

# Using normal dataset 

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, stratify = y)

In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

DTC = DecisionTreeClassifier(criterion = 'gini', max_depth = 10, min_samples_leaf = 10)

DTC.fit(X_train, y_train)
y_pred = DTC.predict(X_test)
c0 = classification_report(y_test, y_pred)

# Apply SMOTING 

In [7]:
from imblearn.over_sampling import SMOTE
SMT = SMOTE()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, stratify = y)
X_train, y_train = SMT.fit_sample(X_train, y_train)

In [8]:
DTC = DecisionTreeClassifier(criterion = 'gini', max_depth = 10, min_samples_leaf = 10)

DTC.fit(X_train, y_train)
y_pred = DTC.predict(X_test)
c1 = classification_report(y_test, y_pred)

# Apply Near Miss 

In [9]:
from imblearn.under_sampling import NearMiss
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, stratify = y)
X_train, y_train = NearMiss().fit_sample(X_train, y_train)

In [10]:
DTC = DecisionTreeClassifier(criterion = 'gini', max_depth = 10, min_samples_leaf = 10)

DTC.fit(X_train, y_train)
y_pred = DTC.predict(X_test)
c2 = classification_report(y_test, y_pred)

In [11]:
print("\n================================= Basic Classifier Results =================================\n")
print(c0)
print("\n================================= SMOTED Classifier Results =================================\n")
print(c1)
print("\n================================= NearMiss Classifier Results =================================\n")
print(c2)



              precision    recall  f1-score   support

          -1       0.91      0.96      0.93        90
           1       0.43      0.27      0.33        11

    accuracy                           0.88       101
   macro avg       0.67      0.61      0.63       101
weighted avg       0.86      0.88      0.87       101



              precision    recall  f1-score   support

          -1       0.95      0.92      0.94        90
           1       0.50      0.64      0.56        11

    accuracy                           0.89       101
   macro avg       0.73      0.78      0.75       101
weighted avg       0.90      0.89      0.90       101



              precision    recall  f1-score   support

          -1       0.92      0.64      0.76        90
           1       0.16      0.55      0.24        11

    accuracy                           0.63       101
   macro avg       0.54      0.59      0.50       101
weighted avg       0.84      0.63      0.70       101

