In [None]:
%pip install numpy
%pip install pandas
%pip install matplotlib

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from IPython.core.debugger import set_trace
import warnings
warnings.filterwarnings('ignore')
from typing import List
from tqdm import tqdm

# New section

In [None]:
# for dirname, _, filenames in os.walk(('/kaggle/input')):
#   for filename in filenames:
#     print(os.path.join(dirname, filename))

# Loading and Preprocessing the dataset

In [None]:
train_df=pd.read_csv('/Programming/COMP551/COMP551_A3/dataset/sign_mnist_train.csv')
train_df=pd.read_csv('/Programming/COMP551/COMP551_A3/dataset/sign_mnist_test.csv')

In [None]:
train_df.describe()

In [None]:
train_df.info()

In [None]:
test_df.info()

The train_df dataset consit of 1st column representing labels 1 to 24. The label is loaded in a separate dataframe called 'train_label' and the 'label' column is dropped from the original training dataframe which now consist of only 784 pixel values for each image.

In [None]:
# Drop the label column for the train_df
train_label = train_df['label']
trainset = train_df.drop(['label'], axis=1)
# Convert the dataframe to numpy array
X_train = trainset.values.astype(np.float64)

# Same thing for the test_df
test_label = test_df['label']
testset = test_df.drop(['label'], axis=1)
# Convert the dataframe to numpy array
X_test = testset.values.astype(np.float64)

One-hot encodding

In [None]:
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train_label)
y_test = lb.fit_transform(test_label)

# Normalizatioin and Vectorization

In [None]:
X_train_mean = np.mean(X_train)
X_train_std = np.std(X_train, axis=0)
# For images, subtract a single data from all pixels
X_train -= X_train_mean
X_train /= X_train_std + 1e-5

X_test -= X_train_mean
X_test /= X_train_std + 1e-5

# Vectorization
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

2 hidden layers MLP

In [None]:
class NeuralNetlayer:
  def __init_(self):
    self.gradient = None
    self.parameters = None

  def forward(self, x):
    raise NotImplementedError

  def backward(self, gradient):
    raise NotImplementedError

In [None]:
class LinearLayer(NeuralNetlayer):
  def __init__(self, input_size, output_size):
    super().__init__()
    self.ni = input_size
    self.no = output_size
    self.w = np.random.randn(output_size, input_size)
    self.b = np.random.randn(output_size)
    self.cur_input = None
    self.parameters = [self.w, self.b]

  def forward(self, x):
    self.cur_input = x
    return (self.w[None, :, :] @ x[:, :, None]).squeeze() + self.b

  def backward(self, gradient):
    assert self.cur_input is not None, "Must call forward before backward!"
    # dw = gradient.dot(self.cur_input)
    dw = gradient[:, :, None] @ self.cur_input[:, None, :]
    db = gradient
    self.gradient = [dw, db]
    return gradient.dot(self.w)

In [None]:
class ReLULayer(NeuralNetLayer):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        self.gradient = np.where(x > 0, 1.0, 0.0)
        return np.maximum(0, x)

    def backward(self, gradient):
        assert self.gradient is not None, "Must call forward before backward"
        return gradient * self.gradient

In [None]:
class SoftmaxOutputLayer(NeuralNetLayer):
    def __init__(self):
        super().__init__()
        self.cur_probs = None

    def forward(self, x):
        exps = np.exp(x)
        probs = exps / np.sum(exps, axis=-1)[:, None]
        self.cur_probs = probs
        return probs

    def backward(self, target):
        assert self.cur_probs is not None, "Must call forward before backward"
        return self.cur_probs - target

In [None]:
class MLP:
    def __init__(self, *args: List[NeuralNetLayer]):
        self.layers = args

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, target):
        for layer in self.layers[::-1]:
            target = layer.backward(target)

In [None]:
class Optimizer:
    def __init__(self, net: MLP):
        self.net = net

    def step(self):
        for layer in self.net.layers[::-1]:
            if layer.parameters is not None:
                self.update(layer.parameters, layer.gradient)

    def update(self, params, gradient):
        raise NotImplementedError

class GradientDescentOptimizer(Optimizer):
    def __init__(self, net: MLP, lr: float):
        super().__init__(net)
        self.lr = lr

    def update(self, params, gradient):
        for (p, g) in zip(params, gradient):
            p -= self.lr * g.mean(axis=0)

In [None]:
def train(mlp: MLP, optimizer: Optimizer, data_x, data_y, steps):
    losses = []
    labels = np.eye(3)[np.array(data_y)]
    for _ in tqdm(range(steps)):
        predictions = mlp.forward(data_x)
        loss = -(labels * np.log(predictions)).sum(axis=-1).mean()
        losses.append(loss)
        mlp.backward(labels)
        optimizer.step()
    plt.plot(losses)
    plt.xlabel("Epoch")
    plt.ylabel("Cross entropy loss")

In [None]:
n_features = X_train.shape[0]
HIDDEN_SIZE = 64
GRADIENT_STEPS = 200

mlp2 = MLP(
    LinearLayer(n_features, HIDDEN_SIZE),
    ReLULayer(),
    LinearLayer(HIDDEN_SIZE, HIDDEN_SIZE),
    ReLULayer(),
    LinearLayer(HIDDEN_SIZE, 3),
    SoftmaxOutputLayer()
)
opt2 = GradientDescentOptimizer(mlp2, 1e-2)

train(mlp2, opt2, X_train, y_train, GRADIENT_STEPS)