<a href="https://colab.research.google.com/github/baranceanuvlad/Advanced-Topics-in-Neural-Networks-Template-2023/blob/main/Lab03/Homework3" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch
from torch import Tensor
from torchvision.datasets import MNIST
from tqdm import tqdm

In [2]:
def forward(x: Tensor, w: Tensor, b: Tensor) -> Tensor:
    return x @ w + b

In [22]:
def sigmoid_function(z):
  return 1 / (1 + torch.exp(-z))

In [3]:
def ReLU(z):
  return torch.max(z, torch.tensor(0.0))

In [4]:
def ReLU_derivative(x):
    return torch.where(x > 0, torch.tensor(1.0), torch.tensor(0.0))

In [21]:
def activate(x: Tensor) -> Tensor:
    return x.softmax(dim=1)

In [139]:
def test_accuracy(prediction, labels):
  count = 0
  prediction = torch.argmax(prediction, axis=1)
  for idx in range(len(prediction)):
    if prediction[idx] == labels[idx]:
      count += 1
  return count * 100 / len(labels)

In [6]:
def collate(x) -> Tensor:
    if isinstance(x, (tuple, list)):
        if isinstance(x[0], Tensor):
            return torch.stack(x)
        return torch.tensor(x)
    raise "Not supported yet"
    # see torch\utils\data\_utils\collate.py

In [7]:
def to_one_hot(x: Tensor) -> Tensor:
    return torch.eye(x.max() + 1)[x]

In [11]:
def load_mnist(path: str = "./data", train: bool = True, pin_memory: bool = True):
    mnist_raw = MNIST(path, download=True, train=train)
    mnist_data = []
    mnist_labels = []
    for image, label in mnist_raw:
        tensor = torch.from_numpy(np.array(image))
        mnist_data.append(tensor)
        mnist_labels.append(label)

    mnist_data = collate(mnist_data).float()  # shape 60000, 28, 28
    mnist_data = mnist_data.flatten(start_dim=1)  # shape 60000, 784
    mnist_data /= mnist_data.max()  # min max normalize
    mnist_labels = collate(mnist_labels)  # shape 60000
    if train:
        mnist_labels = to_one_hot(mnist_labels)  # shape 60000, 10
    if False:
        return mnist_data.pin_memory(), mnist_labels.pin_memory()
    return mnist_data, mnist_labels

In [12]:
data, labels = load_mnist(train=True)
data_test, labels_test = load_mnist(train=False)

In [None]:
print(data.shape, labels.shape)
print(data_test.shape, labels.shape)

torch.Size([60000, 784]) torch.Size([60000, 10])
torch.Size([10000, 784]) torch.Size([60000, 10])


In [165]:
W = [torch.rand((784,100)), torch.rand((100,10))]
b = [torch.rand(1, 100), torch.rand(1, 10)]
learning_rate = 0.07
batch_size = 64

In [110]:
def batch_multiplication(a, W, batch_size):
  result = torch.zeros(a.shape[0], W.shape[1])
  for i in range(0, a.shape[0], batch_size):
    aux = a[i: i + batch_size] @ W
    result[i : i + batch_size] = aux
  return result


In [156]:
def shuffle_data(a , labels):
  num_rows = a.size(0)
  random_indices = torch.randperm(num_rows)
  return a[random_indices], labels[random_indices]

In [159]:
def train2(data_train, labels, W, b, learning_rate, batch_size):
  for _ in range(500):
    data_train, labels = shuffle_data(data_train, labels)
    z1 = batch_multiplication(data_train, W[0], batch_size) + b[0]
    activ1 = ReLU(z1)
    z2 = batch_multiplication(activ1, W[1], batch_size) + b[1]
    print('Loss Train:', torch.nn.functional.cross_entropy(z2, labels))
    activ2 = activate(z2)
    print(test_accuracy(activ2, torch.argmax(labels, axis = 1)))

    err2 = labels - activ2
    err1 = ReLU_derivative(activ1) * (err2  @ W[1].T)
    d_w1 = batch_multiplication(data_train.T, err1, batch_size)
    d_b1 = err1.mean(axis=0)
    d_w2 = batch_multiplication(activ1.T, err2, batch_size)
    d_b2 = err2.mean(axis=0)
    W[0] += learning_rate * d_w1 / (data_train.shape[0])
    b[0] += learning_rate * d_b1
    W[1] += learning_rate * d_w2 / (data_train.shape[0])
    b[1] += learning_rate * d_b2


    z1 = batch_multiplication(data_test, W[0], batch_size) + b[0]
    activ1 = ReLU(z1)
    z2 = batch_multiplication(activ1, W[1], batch_size) + b[1]
    print('Loss Validation:', torch.nn.functional.cross_entropy(z2, labels_test))
    activ2 = activate(z2)
    print(test_accuracy(activ2, labels_test))



In [166]:
train2(data, labels, W, b,learning_rate, batch_size)

Loss Train: tensor(184.2829)
9.915
Loss Validation: tensor(2443.8438)
9.8
Loss Train: tensor(2364.5713)
9.871666666666666
Loss Validation: tensor(4580.9014)
10.32
Loss Train: tensor(4538.6650)
9.93
Loss Validation: tensor(6415.0098)
10.68
Loss Train: tensor(6282.0659)
11.131666666666666
Loss Validation: tensor(7157.5513)
9.58
Loss Train: tensor(7039.7676)
9.863333333333333
Loss Validation: tensor(7681.7080)
9.74
Loss Train: tensor(7558.0630)
9.751666666666667
Loss Validation: tensor(7511.2646)
10.28
Loss Train: tensor(7392.8281)
10.441666666666666
Loss Validation: tensor(6993.1816)
9.82
Loss Train: tensor(6913.5669)
9.736666666666666
Loss Validation: tensor(6088.7930)
8.92
Loss Train: tensor(6026.1680)
9.035
Loss Validation: tensor(3809.4011)
11.35
Loss Train: tensor(3780.4099)
11.236666666666666
Loss Validation: tensor(1024.0638)
8.92
Loss Train: tensor(1016.0004)
9.035
Loss Validation: tensor(603.1812)
12.79
Loss Train: tensor(598.8910)
12.483333333333333
Loss Validation: tensor(572.