# Coding Assignment 5

CS 598 Practical Statistical Learning

2023-11-27

UIUC Fall 2023

**Authors**
* Ryan Fogle
    - rsfogle2@illinois.edu
    - UIN: 652628818
* Sean Enright
    - seanre2@illinois.edu
    - UIN: 661791377

**Contributions**

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

In [None]:
# set random seed
np.random.seed(3434)

# Read in data
train = pd.read_csv('coding5_train.csv')
test = pd.read_csv('coding5_test.csv')

# Create Data matrixes
X_train = train[train.columns[:-1]].copy().values
Y_train = train['Y'].copy().values
X_test = test[test.columns[:-1]].copy().values
Y_test = test['Y'].copy().values

In [None]:
# Set one class to be -1, the other to 1
# in this case when Y == 5, we set it to -1
# when Y == 6, we set it to 1. 
y_train = np.ones(Y_train.shape[0])
y_train[Y_train == Y_train.min()] = -1

y_test = np.ones(Y_test.shape[0])
y_test[Y_test == Y_test.min()] = -1

In [None]:
def pegasos(X, Y, epochs=20, lam=1):
    # shuffle data
    inds = np.arange(X.shape[0])
    np.random.shuffle(inds)
    X = X[inds]
    Y = Y[inds]

    # intialize parameters
    N = X.shape[0]
    p = X.shape[1]
    t = 0
    alpha = 0
    beta = np.zeros(p).reshape(-1, 1)
    
    # loop through each epoch
    for epoch in range(epochs):

        # loop through each data point
        for i in range(N):
            t = t + 1
            nt = 1 / (t * lam)
            if Y[i] * (np.dot(X[i], beta) + alpha) < 1:
                big_delta = lam * beta - (Y[i]*X[i]).reshape(-1,1)
                little_delta = -Y[i]
            else:
                big_delta = lam * beta
                little_delta = 0
            beta = beta - nt * big_delta
            alpha = alpha - nt * little_delta
    return beta, alpha

beta, alpha = pegasos(X_train, y_train)
Y_pred = (X_test @ beta + alpha).reshape(-1)

In [None]:
fig = ConfusionMatrixDisplay(confusion_matrix(Y_test == Y_test.max(), Y_pred > 0))
fig.plot()
plt.show()

In [None]:
acc = accuracy_score(Y_test == Y_test.max(), Y_pred.reshape(-1) > 0)
print(f'Accuracy: {acc:.2f}')
print(f'Test Error: {1 - acc:.2f}')