# 1. Multi-class Classification
## 1.1 Dataset

In [None]:
import os
import random
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
import scipy.optimize as opt
from pathlib import Path

data_file_1 = Path(os.path.abspath("")) / "data" / "ex3data1.mat"
data_set_1 = sio.loadmat(data_file_1)

# every row of X is a flattened 20 * 20 image matrix
# every row of y is a digit index, where [1, ..., 9] means digit [1, ..., 9], [10] means digit [0]
X = np.array(data_set_1["X"])
y = np.array(data_set_1["y"])

print(X)
print(y)

## 1.2 Visualizing the data

In [None]:
fig, axes = plt.subplots(5, 5, figsize=(8, 8))
random_indexes = random.choices([i for i in range(X.shape[0])], k = 25)

for i, ax in enumerate(axes.flat):
    ax.imshow(X[random_indexes[i]].reshape(20, 20))

## 1.3 Vectorizing logistic regression
### 1.3.1 Vectorizing the cost function

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cost(theta, X, y):
    h = sigmoid(X @ theta)
    m = X.shape[0]
    j = 1 / m * (-y.T @ np.log(h) - (1 - y).T @ np.log(1 - h))
    return j.item()

### 1.3.2 Vectorizing the gradient

In [None]:
def gradient(theta, X, y):
    m = X.shape[0]
    gradient = 1 / m * X.T @ (sigmoid(X @ theta) - y)
    return gradient

### 1.3.3 Vectorizing regularized logistic regression

In [None]:
def regularized_cost(theta, lamda, X, y):
    m = X.shape[0]
    regular_theta = np.insert(theta[1:], 0, 0, axis=0)
    logistic_cost = cost(theta, X, y)
    regularize_factor = lamda / (2 * m) * (regular_theta.T @ regular_theta).item()
    return logistic_cost + regularize_factor

def regularized_gradient(theta, lamda, X, y):
    m = X.shape[0]
    regular_theta = np.insert(theta[1:], 0, 0, axis=0)
    logistic_gradient = gradient(theta, X, y)
    regularize_vector = lamda / m * regular_theta
    return logistic_gradient + regularize_vector

theta_t = np.array([[-2], [-1], [1], [2]])
X_t = np.insert(np.array([i for i in range(1, 16)]).reshape((3, 5)).T / 10, 0, np.ones(5), axis=1)
y_t = np.array([[1], [0], [1], [0], [1]])
lambda_t = 3
print(theta_t)
print(X_t)
print(y_t)
print(lambda_t)

cost_t = regularized_cost(theta_t, lambda_t, X_t, y_t)
grad_t = regularized_gradient(theta_t, lambda_t, X_t, y_t)
print(cost_t)
print(grad_t)

## 1.4 One-vs-all classication

In [None]:
num_labels = 10
lamda = 0.1

def one_vs_all(X, y, num_labels, lamda):
    labels = np.array([i for i in range(1, num_labels + 1)])
    y_list = np.array([[1 if i == l else 0 for i in y] for l in labels])
    learned_theta_matrix = np.empty((0, X.shape[1] + 1))
    for y_ele in y_list:
        res = opt.minimize(fun = cost, x0 = np.zeros((X.shape[1] + 1, 1)), args = (np.insert(X, 0, np.ones(X.shape[0]), axis=1), y_ele), method = 'TNC', jac = gradient)
        learned_theta = res.x
        learned_theta_matrix = np.append(learned_theta_matrix, np.array([learned_theta]), axis=0)
    return learned_theta_matrix

learned_theta_matrix = one_vs_all(X, y, num_labels, lamda)
print(learned_theta_matrix)

### 1.4.1 One-vs-all prediction

In [None]:
def predict_one_vs_all(theta_matrix, X):
    input_X = np.insert(X, 0, np.ones(X.shape[0]), axis=1)
    res_matrix = input_X @ theta_matrix.T
    prediction = np.argmax(res_matrix, axis=1) + 1
    return prediction

pred = predict_one_vs_all(learned_theta_matrix, X)
accuracy = np.sum(pred == y.flatten()) / y.shape[0]
print(pred)
print(y.flatten())
print(accuracy)