# Import

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
from typing import Tuple
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn.functional as F
from tqdm import tqdm
from utils import array_to_one_hot

# Dataloader

In [2]:
train_dataset = datasets.MNIST(
    root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(
    root='./data', train=False, transform=transforms.ToTensor(), download=True)

In [3]:
BATCH_SIZE = 20
SHUFFLE = True

train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=SHUFFLE)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=SHUFFLE)

In [4]:
class TwoLayerNet():

    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int):

        self.w1 = np.random.uniform(-0.5, 0.5, (input_dim, hidden_dim))
        self.w2 = np.random.uniform(-0.5, 0.5, (hidden_dim, output_dim))

    def sigmoid_function(self, x: np.ndarray):

        return 1 / (1 + np.exp(-x))

    def sigmoid_function_backward(self, x: np.ndarray):

        output = self.sigmoid_function(x) * (1 - self.sigmoid_function(x))

        return output

    def softmax_function(self, x: np.ndarray):

        exp = np.exp(x)
        sumexp = np.sum(exp, axis=1, keepdims=True)

        output = exp/(sumexp)

        return output

    def softmax_function_backward(self, x: np.ndarray, y: np.ndarray):

        s = self.y_pred
        output = np.zeros((x.shape[0], x.shape[1]))

        for i in range(x.shape[0]):

            s_vector = s[i].reshape((s[i].shape[0], 1))
            s_matrix = np.tile(s_vector, s[i].shape[0])
            softmax_derivative = np.sum(
                (s[i]-y[i]) * (np.diag(s[i]) - (s_matrix * np.transpose(s_matrix))), axis=1)
            output[i] = softmax_derivative

        return output

    def linear_backward_x(self, upstream_grad: np.ndarray, w: np.ndarray):

        dx = np.dot(upstream_grad, w.T)

        return dx

    def linear_backward_w(self, upstream_grad, x):

        dw = np.dot(x.T, upstream_grad)

        return dw

    def forward(self, X: np.ndarray):

        self.x = X
        self.layer_1 = self.x @ self.w1
        self.sigmoid_layer = self.sigmoid_function(self.layer_1)
        self.layer_2 = self.sigmoid_layer @ self.w2
        self.y_pred = self.softmax_function(self.layer_2)

        return self.y_pred

    def backward(self, y: np.ndarray):

        softmax_backward = self.softmax_function_backward(self.layer_2, y)
        linear_layer2_backward_x = self.linear_backward_x(softmax_backward, self.w2)
        sigmoid_backward = self.sigmoid_function_backward(self.layer_1)
        sigmoid_backward = linear_layer2_backward_x * sigmoid_backward

        dw2 = 1 /y.shape[0] * self.linear_backward_w(softmax_backward, self.sigmoid_layer)
        dw1 = 1 / y.shape[0] * self.linear_backward_w(sigmoid_backward, self.x)

        #Update gradients

        self.w1 = self.w1 - (self.learning_rate * dw1)
        self.w2 = self.w2 - (self.learning_rate * dw2)


    def loss_mse(self, y_pred: np.ndarray, y: np.ndarray):

        return np.mean((y_pred - y) ** 2)

    def train(self, num_iterations, train_dataloader, learning_rate):

        loss_train = []

        self.learning_rate = learning_rate
        progress_bar = tqdm(range(num_iterations), total=num_iterations)
        loss_train = []

        for t in progress_bar:

            loss = 0.0

            for imgs, labels in train_dataloader:

                X_batch = imgs.squeeze().reshape(
                    (imgs.shape[0], np.prod(imgs.shape[1:], axis=0))).numpy()
                y_batch = array_to_one_hot(labels.numpy(), 10)

                output = self.forward(X_batch)

                loss_batch = self.loss_mse(output, y_batch)

                self.backward(y_batch)

                loss += loss_batch
            loss_train.append(loss)
            progress_bar.set_description(f"Iter {t+1}: loss {loss:.5f}. ")
    

        return loss_train


In [5]:
model = TwoLayerNet(784,64,10)

loss = model.train(1000,train_loader,1e-3)

Iter 36: loss 142.58491. :   7%|▋         | 36/500 [04:41<59:13,  7.66s/it]

In [None]:
def test(test_dataloader, model):

    correct_predicted = 0

    for imgs, labels in test_dataloader:

        X_batch = imgs.squeeze().reshape(
            (imgs.shape[0], np.prod(imgs.shape[1:], axis=0))).numpy()
        

        output = model.forward(X_batch).numpy()

        predictions = np.argmax(output, axis=1)

        correct_batch = np.count_nonzero(predictions == labels.numpy())

        correct_predicted += correct_batch
    acc = (correct_batch / (len(test_loader))) * 100
    return acc

In [None]:
acc = test(test_loader, model)