In [None]:
import pandas as pd
from sklearn.datasets import make_moons
from sklearn.metrics import classification_report
from SMOTEBoost import SMOTEBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import balanced_accuracy_score, f1_score
from sklearn.model_selection import train_test_split

# Toy example

In [None]:
X, y = make_moons(n_samples=1000, noise=0.3, random_state=0)
X_train, X_test, y_trian, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [18]:
smote = SMOTEBoostClassifier()
smote.fit(X_train, y_trian)


predicitons = smote.predict(X_test)

print('balanced_accuracy_score: ', balanced_accuracy_score(y_test, predicitons), 'f1_score: ', f1_score(y_test, predicitons))
print()
print(classification_report(y_test, predicitons))

balanced_accuracy_score:  0.8971015025164395 f1_score:  0.896969696969697

              precision    recall  f1-score   support

           0       0.89      0.91      0.90       163
           1       0.91      0.89      0.90       167

    accuracy                           0.90       330
   macro avg       0.90      0.90      0.90       330
weighted avg       0.90      0.90      0.90       330



# Our Dataset with Imbalanced classes

In [None]:
import os, sys

path = sys.path[0].replace('SMOTEBoost', 'data')
data_name = path + '\\' +  os.listdir(path)[0]

In [19]:
df = pd.read_csv(data_name)
y = df['Class-label']
X = df.drop('Class-label', axis=1)
X = pd.get_dummies(X)


X_train, X_test, y_trian, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [21]:
for depth in [3, 5, 7, 20]:

    smote = SMOTEBoostClassifier(DecisionTreeClassifier(max_depth=depth))
    smote.fit(X_train, y_trian)
    
    predicitons = smote.predict(X_test)
    
    print("Base estimator's depth: ", depth)
    print('balanced_accuracy_score: ', balanced_accuracy_score(y_test, predicitons), 'f1_score: ', f1_score(y_test, predicitons))
    print()
    print(classification_report(y_test, predicitons))
    print()

Base estimator's depth:  3
balanced_accuracy_score:  0.6394012633478536 f1_score:  0.43704169285564637

              precision    recall  f1-score   support

           0       0.81      0.99      0.89     11243
           1       0.94      0.28      0.44      3665

    accuracy                           0.82     14908
   macro avg       0.88      0.64      0.66     14908
weighted avg       0.84      0.82      0.78     14908


Base estimator's depth:  5
balanced_accuracy_score:  0.7340278183096252 f1_score:  0.6213271706833939

              precision    recall  f1-score   support

           0       0.86      0.95      0.90     11243
           1       0.79      0.51      0.62      3665

    accuracy                           0.85     14908
   macro avg       0.82      0.73      0.76     14908
weighted avg       0.84      0.85      0.83     14908


Base estimator's depth:  7
balanced_accuracy_score:  0.7576157436872347 f1_score:  0.65687667771988

              precision    recall  f