In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

def polynomial_kernel(x1, x2, degree=3, coef0=1):
    return (np.dot(x1, x2.T) + coef0) ** degree

class KernelSVM:
    def __init__(self, lr=0.001, lambda_param=0.01, n_iters=100, degree=3, coef0=1):
        self.lr = lr
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.degree = degree
        self.coef0 = coef0
        self.alpha = None
        self.b = 0
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.X_train = X
        self.y_train = y
        self.alpha = np.zeros(n_samples)
        self.b = 0
        
        K = polynomial_kernel(X, X, self.degree, self.coef0)
        
        for _ in range(self.n_iters):
            for i in range(n_samples):
                pred = np.sum(self.alpha * self.y_train * K[:, i]) + self.b
                condition = y[i] * pred >= 1
                
                if condition:
                    self.alpha[i] -= self.lr * (self.lambda_param * self.alpha[i])
                else:
                    self.alpha[i] += self.lr * (1 - y[i] * pred)
                    self.b += self.lr * y[i]

    def predict(self, X_test):
        K = polynomial_kernel(self.X_train, X_test, self.degree, self.coef0)
        decision = np.dot(self.alpha * self.y_train, K) + self.b
        return np.sign(decision)

try:
    data = pd.read_csv(r"C:\Users\ROHIT\OneDrive\Desktop\SM\AIML\datasets\emails_16_17_18_19.csv")
    
    X = data.drop(columns=["Email No.", "Prediction"]).values
    y = np.where(data["Prediction"] == 0, -1, 1)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    print(f"Original training shape: {X_train.shape}")
    print(f"Original training class distribution: {np.bincount(y_train == 1)}")

    smote = SMOTE(random_state=42)
    X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
    
    print(f"Resampled training shape: {X_train_res.shape}")
    print(f"Resampled training class distribution: {np.bincount(y_train_res == 1)}")

    print("\nTraining SVM (Linear Kernel) with Oversampled Data...")
    svm_model_18 = KernelSVM(lr=0.001, lambda_param=0.01, n_iters=50, degree=1, coef0=0)
    
    svm_model_18.fit(X_train_res, y_train_res)
    
    y_pred = svm_model_18.predict(X_test)
    
    print("\n--- Evaluation (Problem 18) ---")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['Not Spam (-1)', 'Spam (1)']))

except FileNotFoundError:
    print("Error: 'emails_16_17_18_19.csv' not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Good afternoon, ma’am. This code builds an email spam vs not-spam classifier using a Support Vector Machine (SVM) with a kernel implemented from scratch, and it also handles class imbalance using SMOTE before training. First, I import NumPy and pandas for arrays and data frames, Matplotlib/Seaborn are not used here, and from scikit-learn I bring train_test_split, StandardScaler, and evaluation metrics; I also import SMOTE from imblearn.over_sampling to balance the classes. I define a polynomial_kernel(x1, x2, degree=3, coef0=1) function that returns $(x_1 \cdot x_2^\top + \text{coef}0)^{\text{degree}}$; this lets the SVM work in an implicit feature space without explicitly creating polynomial features. Then I create a class KernelSVM with learning rate lr, regularization strength lambda_param, number of iterations n_iters, and kernel shape parameters degree and coef0; inside it I keep the dual weights $\alpha$, the bias $b$, and references to the training data for later use in prediction. In fit, I store the training matrix $\mathbf{X}$ and labels $\mathbf{y}$ (labels must be $-1$ for Not-Spam and $+1$ for Spam), initialize all dual weights $\alpha$ to zero and $b$ to zero, and precompute the full Gram matrix $\mathbf{K} = \text{polynomial\_kernel}(\mathbf{X}, \mathbf{X})$, whose $(i,j)$ entry is the kernel between sample $i$ and $j$. I then run n_iters passes; in each pass I loop through every sample $i$, compute the current decision value $\text{pred} = \Sigma_j (\alpha_j \cdot y_j \cdot \mathbf{K}[j, i]) + b$, check the margin condition $y[i] \cdot \text{pred} \ge 1$ (which means the point is correctly classified with margin), and update: if the condition holds, I shrink $\alpha[i]$ a bit by $\text{lr} \cdot \lambda_{\text{param}} \cdot \alpha[i]$ (regularization step); otherwise I make a hinge-loss correction by increasing $\alpha[i]$ with $\text{lr} \cdot (1 - y[i] \cdot \text{pred})$ and also nudge the bias with $b += \text{lr} \cdot y[i]$. After training, predict computes the kernel between all training points and each test point, forms the decision function $\text{decision} = (\alpha \cdot y_{\text{train}}) \cdot \mathbf{K} + b$, and returns the $\text{sign}$ as the class label ($-1$ or $+1$). Next, in a try block I load the email dataset from the given CSV path, drop the non-feature columns Email No. and Prediction to form $\mathbf{X}$, and encode the target $\mathbf{y}$ as $-1$ for original $0$ (Not Spam) and $+1$ for original $1$ (Spam), which matches the SVM’s expected labels. I split the data into train and test with an $80/20$ stratified split for balanced class proportions. I standardize features using StandardScaler, fitting on the training set only and transforming both train and test to avoid data leakage. I print the original training shape and class distribution (the distribution line uses np.bincount(y_train == 1), which counts False and True occurrences—so it shows counts of not-spam vs spam). To address any imbalance, I apply SMOTE on the scaled training data to synthetically oversample the minority class and print the new balanced class counts. Then I announce training an “SVM (Linear Kernel)” and instantiate KernelSVM with degree=1 and coef0=0; note that a polynomial kernel with degree $1$ and $\text{coef}0=0$ reduces to a linear kernel, so we are effectively training a linear SVM but still via the kernel machinery. I call fit on the oversampled training data, then predict on the (only scaled, not oversampled) test set. Finally, I evaluate the model: I print the confusion matrix to see true/false positives and negatives, and a classification report with precision, recall, and F1 for both classes labeled “Not Spam ($-1$)” and “Spam ($1$)”. In essence, this script loads and cleans the email feature data, encodes labels as $-1/+1$ for SVM, standardizes features, uses SMOTE to balance classes, trains a from-scratch kernel SVM with a linear kernel setting, predicts on the held-out test set, and reports clear classification metrics so we can judge how well the spam detector performs.