In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from scipy.stats import norm

In [7]:
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data.to_numpy(), mnist.target.to_numpy().astype(int)

In [9]:
X = X / 255.0
y_binary = np.where(y == 0, 1, -1)

X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

In [11]:
class SVM:
    def __init__(self, learning_rate=0.01, lambda_param=0.01, epochs=1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.epochs = epochs
        self.w = None
        self.b = 0

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        for _ in range(self.epochs):
            for i, x_i in enumerate(X):
                condition = y[i] * (np.dot(x_i, self.w) - self.b) >= 1
                if condition:
                    self.w -= self.lr * (2 * self.lambda_param * self.w)  
                else:
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y[i]))
                    self.b -= self.lr * y[i]

    def predict(self, X):
        approx = np.dot(X, self.w) - self.b
        return np.sign(approx)

In [13]:
class NaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}
        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = np.mean(X_c, axis=0)
            self.var[c] = np.var(X_c, axis=0) + 1e-9 
            self.priors[c] = X_c.shape[0] / X.shape[0]

    def predict(self, X):
        preds = [self._predict_single(x) for x in X]
        return np.array(preds)

    def _predict_single(self, x):
        posteriors = []
        for c in self.classes:
            prior = np.log(self.priors[c])
            class_conditional = np.sum(np.log(self._pdf(c, x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]

    def _pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        var += 1e-9  
        return norm.pdf(x, mean, np.sqrt(var)) + 1e-9  

In [15]:
svm = SVM(learning_rate=0.001, lambda_param=0.01, epochs=1000)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

In [17]:
nb = NaiveBayes()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)

In [19]:
def calculate_metrics(y_true, y_pred):
    conf_matrix = confusion_matrix(y_true, y_pred)
    precision = {}
    recall = {}
    f1_score = {}

    for i, label in enumerate(np.unique(y_true)):
        TP = conf_matrix[i, i]
        FP = sum(conf_matrix[:, i]) - TP
        FN = sum(conf_matrix[i, :]) - TP
        precision[label] = TP / (TP + FP) if (TP + FP) > 0 else 0
        recall[label] = TP / (TP + FN) if (TP + FN) > 0 else 0
        f1_score[label] = 2 * (precision[label] * recall[label]) / (precision[label] + recall[label]) if (precision[label] + recall[label]) > 0 else 0

    return precision, recall, f1_score, conf_matrix

In [21]:
precision_svm, recall_svm, f1_svm, conf_matrix_svm = calculate_metrics(y_test, y_pred_svm)

In [23]:
precision_nb, recall_nb, f1_nb, conf_matrix_nb = calculate_metrics(y_test, y_pred_nb)

In [25]:
def print_metrics(model_name, precision, recall, f1_score, conf_matrix, accuracy):
    print(f"\n\n===== {model_name} Results =====")
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("\nConfusion Matrix:")
    print(conf_matrix)
    print("\nPrecision, Recall, F1-Score per class:")
    print("Class  | Precision  | Recall  | F1-Score")
    print("-----------------------------------------")
    for label in precision.keys():
        print(f"  {label:4} |   {precision[label]:.4f}   | {recall[label]:.4f}  | {f1_score[label]:.4f}")

In [27]:
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print_metrics("Support Vector Machine (SVM)", precision_svm, recall_svm, f1_svm, conf_matrix_svm, accuracy_svm)

accuracy_nb = accuracy_score(y_test, y_pred_nb)
print_metrics("Naïve Bayes", precision_nb, recall_nb, f1_nb, conf_matrix_nb, accuracy_nb)



===== Support Vector Machine (SVM) Results =====
Accuracy: 99.09%

Confusion Matrix:
[[12613    44]
 [   83  1260]]

Precision, Recall, F1-Score per class:
Class  | Precision  | Recall  | F1-Score
-----------------------------------------
    -1 |   0.9935   | 0.9965  | 0.9950
     1 |   0.9663   | 0.9382  | 0.9520


===== Naïve Bayes Results =====
Accuracy: 29.19%

Confusion Matrix:
[[2747 9910]
 [   3 1340]]

Precision, Recall, F1-Score per class:
Class  | Precision  | Recall  | F1-Score
-----------------------------------------
    -1 |   0.9989   | 0.2170  | 0.3566
     1 |   0.1191   | 0.9978  | 0.2128
