In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import NearestNeighbors

# Load dataset
df = pd.read_csv("Iris.csv")

# Drop Id column if present
if "Id" in df.columns:
    df = df.drop("Id", axis=1)

# Encode labels (Species -> 0,1,2)
le = LabelEncoder()
df["Species"] = le.fit_transform(df["Species"])

X = df.drop("Species", axis=1).values
y = df["Species"].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

labels = np.unique(y)
class_names = le.classes_


In [19]:
def manual_smote(X, y, target_class, setting="random", n_samples=20):
    X_minority = X[y == target_class]
    synthetic_samples = []

    if setting == "random":
        # (a) Random two-sample interpolation
        for _ in range(n_samples):
            i, j = np.random.choice(len(X_minority), 2, replace=False)
            lam = np.random.rand()
            x_new = lam * X_minority[i] + (1 - lam) * X_minority[j]
            synthetic_samples.append(x_new)

    elif setting == "nearest":
        # (b) Nearest-neighbor interpolation
        nn = NearestNeighbors(n_neighbors=2).fit(X_minority)
        for i in range(len(X_minority)):
            _, idx = nn.kneighbors([X_minority[i]])
            j = idx[0][1]   # nearest neighbor
            lam = np.random.rand()
            x_new = lam * X_minority[i] + (1 - lam) * X_minority[j]
            synthetic_samples.append(x_new)

    synthetic_samples = np.array(synthetic_samples)
    y_new = np.full(len(synthetic_samples), target_class)

    X_resampled = np.vstack([X, synthetic_samples])
    y_resampled = np.hstack([y, y_new])

    return X_resampled, y_resampled


In [20]:
def train_and_collect_preds(X_train, y_train, X_test, setting):
    all_preds = np.zeros_like(y_test)

    for cls in np.unique(y_train):
        # Oversample this class
        X_res, y_res = manual_smote(X_train, y_train, target_class=cls,
                                    setting=setting, n_samples=30)

        # Binary labels
        y_train_binary = (y_res == cls).astype(int)

        # Train linear regression
        model = LinearRegression().fit(X_res, y_train_binary)

        # Predict binary for this class
        y_pred_binary = (model.predict(X_test) >= 0.5).astype(int)

        # Assign class if predicted positive
        for i in range(len(y_test)):
            if y_pred_binary[i] == 1:
                all_preds[i] = cls

    return all_preds


In [21]:
def compute_metrics(y_true, y_pred, pos_label=1):
    tp = np.sum((y_true == pos_label) & (y_pred == pos_label))
    fp = np.sum((y_true != pos_label) & (y_pred == pos_label))
    fn = np.sum((y_true == pos_label) & (y_pred != pos_label))

    prec = tp / (tp + fp) if (tp + fp) > 0 else 0
    rec  = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1   = (2 * prec * rec) / (prec + rec) if (prec + rec) > 0 else 0

    return prec, rec, f1, tp, fp, fn

def classification_report_manual(y_true, y_pred, labels, class_names):
    report = {}
    total_tp = total_fp = total_fn = 0
    weighted_sum = 0
    total_samples = len(y_true)

    for cls, name in zip(labels, class_names):
        prec, rec, f1, tp, fp, fn = compute_metrics((y_true==cls).astype(int),
                                                    (y_pred==cls).astype(int))
        support = np.sum(y_true==cls)
        report[name] = {"precision": prec, "recall": rec, "f1": f1, "support": support}
        total_tp += tp
        total_fp += fp
        total_fn += fn
        weighted_sum += f1 * support

    # Macro, Micro, Weighted
    macro_f1 = np.mean([v["f1"] for v in report.values()])
    micro_prec = total_tp / (total_tp + total_fp) if (total_tp+total_fp) > 0 else 0
    micro_rec  = total_tp / (total_tp + total_fn) if (total_tp+total_fn) > 0 else 0
    micro_f1   = (2 * micro_prec * micro_rec) / (micro_prec + micro_rec) if (micro_prec+micro_rec) > 0 else 0
    weighted_f1 = weighted_sum / total_samples

    return report, macro_f1, micro_f1, weighted_f1

def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)


In [22]:
def run_experiment(setting_name):
    print(f"\n=== {setting_name.upper()} ===")
    y_pred = train_and_collect_preds(X_train, y_train, X_test, setting_name)

    acc = accuracy(y_test, y_pred)
    report, macro_f1, micro_f1, weighted_f1 = classification_report_manual(
        y_test, y_pred, labels, class_names
    )

    print("Accuracy:", acc)
    print("Macro F1:", macro_f1)
    print("Micro F1:", micro_f1)
    print("Weighted F1:", weighted_f1, "\n")

    print("Per-Class Report:")
    for cls, metrics in report.items():
        print(f"{cls:15s} | "
              f"Precision: {metrics['precision']:.3f} | "
              f"Recall: {metrics['recall']:.3f} | "
              f"F1: {metrics['f1']:.3f} | "
              f"Support: {metrics['support']}")

# Run both SMOTE settings
run_experiment("random")
run_experiment("nearest")



=== RANDOM ===
Accuracy: 0.8222222222222222
Macro F1: 0.8114695340501793
Micro F1: 0.8222222222222222
Weighted F1: 0.8114695340501792 

Per-Class Report:
Iris-setosa     | Precision: 0.938 | Recall: 1.000 | F1: 0.968 | Support: 15
Iris-versicolor | Precision: 0.889 | Recall: 0.533 | F1: 0.667 | Support: 15
Iris-virginica  | Precision: 0.700 | Recall: 0.933 | F1: 0.800 | Support: 15

=== NEAREST ===
Accuracy: 0.8222222222222222
Macro F1: 0.8114695340501793
Micro F1: 0.8222222222222222
Weighted F1: 0.8114695340501792 

Per-Class Report:
Iris-setosa     | Precision: 0.938 | Recall: 1.000 | F1: 0.968 | Support: 15
Iris-versicolor | Precision: 0.889 | Recall: 0.533 | F1: 0.667 | Support: 15
Iris-virginica  | Precision: 0.700 | Recall: 0.933 | F1: 0.800 | Support: 15
