In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from sklearn.utils import resample



In [3]:
# 1. Load and create an imbalanced dataset
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Make the dataset imbalanced by removing some samples from the majority class (class 2)
X_imbalanced = X[y != 2]
y_imbalanced = y[y != 2]

# 2. Resample the dataset (oversample the minority class or undersample the majority class)
X_resampled, y_resampled = resample(X_imbalanced, y_imbalanced, 
                                    replace=True, 
                                    n_samples=X_imbalanced.shape[0] * 2, 
                                    random_state=42)

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# 4. Standardize the data (important for models like SVM, KNN, etc.)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 5. Apply SMOTE (Synthetic Minority Over-sampling Technique) to generate synthetic data
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# 6. Train the RandomForestClassifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train_smote, y_train_smote)

# 7. Evaluate the model
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        22
           1       1.00      1.00      1.00        18

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40

