<a href="https://colab.research.google.com/github/PCBZ/CS6140/blob/main/HW5/HW5_Problem1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
%pip install ucimlrepo

from ucimlrepo import fetch_ucirepo
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score
import numpy as np

def fetch_spambase_data():
    """
    Fetch Spambase dataset from UCI repository
    """
    spambase = fetch_ucirepo(id=94)

    # Extract features and targets
    X = spambase.data.features.values
    y = spambase.data.targets.values.ravel()

    return X, y

def preprocess_data(X, y):
    """
    Preprocess the data
    """
    # Covert labels to 1/-1
    y = np.where(y == 0, -1, 1)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Standardization
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

def train_and_evaluate_svm_model(X_train, y_train, X_test, y_test):
    """
    Train SVM model
    """
    kernels = ['linear', 'rbf', 'poly']

    for kernel in kernels:
        if kernel == 'poly':
            model = SVC(kernel=kernel, degree=2, C=5, gamma=0.01)
        else:
            model = SVC(kernel=kernel)

        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)
        train_accuracy = accuracy_score(y_train, y_train_pred)
        test_accuracy = accuracy_score(y_test, y_test_pred)

        print(f"{kernel} kernel training Accuracy: {train_accuracy: .2%}")
        print(f"{kernel} kernel testing Accuracy: {test_accuracy: .2%}")


if __name__ == "__main__":
    X, y = fetch_spambase_data()
    X_train, X_test, y_train, y_test = preprocess_data(X, y)

    train_and_evaluate_svm_model(X_train, y_train, X_test, y_test)

linear kernel training Accuracy:  93.32%
linear kernel testing Accuracy:  91.53%
rbf kernel training Accuracy:  94.84%
rbf kernel testing Accuracy:  92.29%
poly kernel training Accuracy:  88.99%
poly kernel testing Accuracy:  87.19%


In [None]:
import requests
import zipfile
import numpy as np
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score
import os

def fetch_data():
    zip_url = "https://www.khoury.northeastern.edu/home/vip/teach/MLcourse/data/mnist_haar_bingyu.zip"
    zip_filename = "mnist_haar_bingyu.zip"
    extracted_folder = "mnist_haar_bingyu"
    response = requests.get(zip_url)
    with open(zip_filename, 'wb') as f:
        f.write(response.content)

    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        zip_ref.extractall(extracted_folder)

    train_images = np.loadtxt(os.path.join(extracted_folder, "training_image.txt"), delimiter=',')
    train_labels = np.loadtxt(os.path.join(extracted_folder, "training_label.txt"), dtype=int, delimiter=',')
    test_images = np.loadtxt(os.path.join(extracted_folder, "testing_image.txt"), delimiter=',')
    test_labels = np.loadtxt(os.path.join(extracted_folder, "testing_label.txt"), dtype=int, delimiter=',')


    return train_images, train_labels, test_images, test_labels

def train_and_evaluate_svm_model(X_train, y_train, X_test, y_test):

    kernels = ['linear', 'rbf', 'poly']

    for kernel in kernels:
        if kernel == 'linear':
            svm = LinearSVC()
        else:
            svm = SVC(kernel=kernel)

        svm.fit(X_train, y_train)

        y_pred_train = svm.predict(X_train)
        y_pred_test = svm.predict(X_test)

        train_accuracy = accuracy_score(y_train, y_pred_train)
        test_accuracy = accuracy_score(y_test, y_pred_test)

        print(f"{kernel} kernel training Accuracy: {train_accuracy: .2%}")
        print(f"{kernel} kernel testing Accuracy: {test_accuracy: .2%}")

if __name__ == "__main__":
    train_images, train_labels, test_images, test_labels = fetch_data()
    train_and_evaluate_svm_model(train_images, train_labels, test_images, test_labels)

linear kernel training Accuracy:  90.96%
linear kernel testing Accuracy:  91.47%
rbf kernel training Accuracy:  96.77%
rbf kernel testing Accuracy:  96.37%
poly kernel training Accuracy:  97.02%
poly kernel testing Accuracy:  96.45%
