In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import NearestNeighbors

In [2]:
# Load dataset
df = pd.read_csv("Iris.csv")

# Drop Id column if present
if "Id" in df.columns:
    df = df.drop("Id", axis=1)

# Encode labels
le = LabelEncoder()
df["Species"] = le.fit_transform(df["Species"])   # 0,1,2

X = df.drop("Species", axis=1).values
y = df["Species"].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


In [None]:
def manual_smote(X, y, target_class, setting="random", n_samples=20):
    X_minority = X[y == target_class]
    synthetic_samples = []

    if setting == "random":
        for _ in range(n_samples):
            i, j = np.random.choice(len(X_minority), 2, replace=False)
            lam = np.random.rand()
            x_new = lam * X_minority[i] + (1 - lam) * X_minority[j]
            synthetic_samples.append(x_new)

    elif setting == "nearest":
        nn = NearestNeighbors(n_neighbors=2).fit(X_minority)
        for i in range(len(X_minority)):
            _, idx = nn.kneighbors([X_minority[i]])
            j = idx[0][1]   # nearest neighbor
            lam = np.random.rand()
            x_new = lam * X_minority[i] + (1 - lam) * X_minority[j]
            synthetic_samples.append(x_new)

    synthetic_samples = np.array(synthetic_samples)
    y_new = np.full(len(synthetic_samples), target_class)

    X_resampled = np.vstack([X, synthetic_samples])
    y_resampled = np.hstack([y, y_new])

    return X_resampled, y_resampled


In [4]:
def train_and_evaluate(X_train, y_train, X_test, y_test, setting):
    reports = {}

    for cls in np.unique(y_train):
        # Oversample this class
        X_res, y_res = manual_smote(X_train, y_train, target_class=cls, setting=setting, n_samples=30)

        # Binary labels
        y_train_binary = (y_res == cls).astype(int)
        y_test_binary = (y_test == cls).astype(int)

        # Train regression model
        model = LinearRegression().fit(X_res, y_train_binary)

        # Predict & threshold
        y_pred_binary = (model.predict(X_test) >= 0.5).astype(int)

        # Store classification report
        reports[cls] = classification_report(
            y_test_binary, y_pred_binary,
            target_names=["Not " + le.classes_[cls], le.classes_[cls]]
        )

    return reports


In [None]:
print("SMOTE1 : Random pairs interpolation\n")
reports_random = train_and_evaluate(X_train, y_train, X_test, y_test, setting="random")
for cls, rep in reports_random.items():
    print(f"Classifier for {le.classes_[cls]}")
    print(rep, "\n")

print("\nSMOTE2 :Nearest neighbor interpolation\n")
reports_nearest = train_and_evaluate(X_train, y_train, X_test, y_test, setting="nearest")
for cls, rep in reports_nearest.items():
    print(f"Classifier for {le.classes_[cls]}")
    print(rep, "\n")


=== SMOTE Setting A: Random pairs interpolation ===

Classifier for Iris-setosa
                 precision    recall  f1-score   support

Not Iris-setosa       1.00      1.00      1.00        30
    Iris-setosa       1.00      1.00      1.00        15

       accuracy                           1.00        45
      macro avg       1.00      1.00      1.00        45
   weighted avg       1.00      1.00      1.00        45
 

Classifier for Iris-versicolor
                     precision    recall  f1-score   support

Not Iris-versicolor       0.82      0.77      0.79        30
    Iris-versicolor       0.59      0.67      0.62        15

           accuracy                           0.73        45
          macro avg       0.70      0.72      0.71        45
       weighted avg       0.74      0.73      0.74        45
 

Classifier for Iris-virginica
                    precision    recall  f1-score   support

Not Iris-virginica       0.92      0.80      0.86        30
    Iris-virginica  