In [37]:
import numpy as np
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch
import time
import torchvision
import torch.nn as nn
from torch import optim


In [38]:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,)),])
input_size = 784
hidden_layer_size = 300
output_size = 10
losses = []
accuracies = []

class FromScratchModel():
    def __init__(self, sizes, epochs=10, alpha=0.01):
        self.sizes = sizes
        self.epochs = epochs
        self.alpha = alpha
        self.init_w()

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def softmax(self, x):
        expo = np.exp(x)
        denominator = np.sum(expo, axis=1)
        denominator.resize(expo.shape[0], 1)
        return expo / denominator

    def init_w(self):
        input_layer = int(self.sizes[0])
        hidden_layer_1 = int(self.sizes[1])
        hidden_layer_2 = int(self.sizes[2])
        output_layer = int(self.sizes[3])
        self.w1 = np.random.uniform(low=-1, high=1, size=(input_layer, hidden_layer_1))
        self.w2 = np.random.uniform(low=-1, high=1, size=(hidden_layer_1, hidden_layer_2))
        self.w3 = np.random.uniform(low=-1, high=1, size=(hidden_layer_2, output_layer))

    def forward_propagation(self, inputs):
        input = inputs.numpy()
        self.linear_1 = input.dot(self.w1)
        self.output_1 = self.sigmoid(self.linear_1)
        self.linear_2 = self.output_1.dot(self.w2)
        self.output_2 = self.sigmoid(self.linear_2)
        self.linear_3=self.output_2.dot(self.w3)
        self.output_3 = self.softmax(self.linear_3)
        return self.output_3

    def backward_propagation(self, X_train, y_train, output):
        X_train = X_train.numpy()
        y_train = y_train.numpy()
        batch_size = y_train.shape[0]
        pred_loss = output - y_train
        grad_w3 = (1. / batch_size) * np.matmul(self.output_2.T, pred_loss)
        d_output_1 = np.matmul(pred_loss, self.w3.T)
        d_linear_2 = d_output_1 * self.sigmoid(self.linear_2) * (1 - self.sigmoid(self.linear_2))
        grad_w2 = (1. / batch_size) * np.matmul(self.output_1.T, d_linear_2)
        d_output_2 = np.matmul(d_linear_2, self.w2.T)
        d_linear_1 = d_output_2 * self.sigmoid(self.linear_1) * (1 - self.sigmoid(self.linear_1))
        grad_w1 = (1. / batch_size) * np.matmul(X_train.T, d_linear_1)
        return grad_w1, grad_w2 ,grad_w3

    def weight_update(self, w1_old, w2_old, w3_old):
        self.w1 -= self.alpha * w1_old
        self.w2 -= self.alpha * w2_old
        self.w3 -= self.alpha * w3_old

    def calc_loss(self, y, y_pred):
        batch_size = y.shape[0]
        y = y.numpy()
        loss = np.sum(np.multiply(y, np.log(y_pred)))
        loss = -(1. / batch_size) * loss
        return loss

    def calc_metrics(self, test):
        losses = []
        correct = 0
        total = 0

        for i, data in enumerate(test):
            x, y = data
            y_onehot_encode = torch.zeros(y.shape[0], 10)
            y_onehot_encode[range(y_onehot_encode.shape[0]), y] = 1
            flattened_input = x.view(-1, 28 * 28)
            output = self.forward_propagation(flattened_input)
            y_pred = np.argmax(output, axis=1)
            correct += np.sum((y_pred == y.numpy()))
            total += y.shape[0]
            loss = self.calc_loss(y_onehot_encode, output)
            losses.append(loss)
        return (correct / total), np.mean(np.array(losses))

    def training(self, train, test):
        start_time = time.time()
        global losses, accuracies
        for i in range(self.epochs):
            for j, data in enumerate(train):
                x, y = data
                y_onehot_encode = torch.zeros(y.shape[0], 10)
                y_onehot_encode[range(y_onehot_encode.shape[0]), y] = 1
                flattened_input = x.view(-1, 28 * 28)
                output = self.forward_propagation(flattened_input)
                w1_up, w2_up, w3_up = self.backward_propagation(flattened_input, y_onehot_encode, output)
                self.weight_update(w1_up, w2_up,w3_up)
            accuracy, loss = self.calc_metrics(test)
            losses.append(loss)
            accuracies.append(accuracy)
            print('Epoch: {0}, Test Error Percent: {1:.2f}, Loss: {2:.2f}'.format(i + 1, 100 - accuracy * 100, loss))


Q2 Case 1

In [None]:
"""
α = 0.05 
BatchSize = 32 
Epochs = 20 
"""

model = FromScratchModel(sizes=[784, 300, 200, 10], epochs=20,  alpha=0.05)
bsize=32
trainset = datasets.MNIST('./dataset/MNIST/', download=True, train=True, transform=transform)
testset = datasets.MNIST('./dataset/MNIST/', download=True, train=False, transform=transform)
train = torch.utils.data.DataLoader(trainset, batch_size=bsize, shuffle=True)
test = torch.utils.data.DataLoader(testset, batch_size=bsize, shuffle=True)
model.training(train, test)
plt.xlabel('Epochs')
plt.ylabel('Test Loss')
plt.plot(losses)
plt.show()


Q2 Case 2

In [None]:
"""
α = 0.05 
BatchSize = 64
Epochs = 20 
"""
model = FromScratchModel(sizes=[784, 300, 200, 10], epochs=20,  alpha=0.05)
bsize=64
train = torch.utils.data.DataLoader(trainset, batch_size=bsize, shuffle=True)
test = torch.utils.data.DataLoader(testset, batch_size=bsize, shuffle=True)
model.training(train, test)
plt.xlabel('Epochs')
plt.ylabel('Test Loss')
plt.plot(losses)
plt.show()


Q2 Case 3

In [None]:
"""
α = 0.005 
BatchSize = 32 
Epochs = 20 
"""
model = FromScratchModel(sizes=[784, 300, 200, 10], epochs=20, alpha=0.005)
bsize=32
train = torch.utils.data.DataLoader(trainset, batch_size=bsize, shuffle=True)
test = torch.utils.data.DataLoader(testset, batch_size=bsize, shuffle=True)
model.training(train, test)
plt.xlabel('Epochs')
plt.ylabel('Test Loss')
plt.plot(losses)
plt.show()

Q3 

In [40]:
def PytorchModel(bsize,rate,epochs=10):
  transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])
  training_set = torchvision.datasets.MNIST('data', train=True, transform=transform, download=True)
  train = torch.utils.data.DataLoader(training_set, batch_size=bsize, shuffle=True)

  test_set = torchvision.datasets.MNIST('data', train=True, transform=transform, download=True)
  test = torch.utils.data.DataLoader(test_set, batch_size=bsize, shuffle=True)

  input_size = train.dataset.train_data.shape[1] * train.dataset.train_data.shape[2]
  hidden_layers = [300,200]
  output_size = 10

  def init_weights(m):
    if type(m) == nn.Linear:
      # torch.nn.init.uniform_(m.weight,-1.0,1.0) #random btw -1 and 1
      torch.nn.init.zeros_(m.weight) #0 weight initilization


  m = nn.Sequential(
      nn.Linear(input_size, hidden_layers[0]),
      nn.Sigmoid(),
      nn.Linear(hidden_layers[0], hidden_layers[1]),
      nn.Sigmoid(),
      nn.Linear(hidden_layers[1], output_size),
      nn.LogSoftmax(dim=1)
  )
  m.apply(init_weights)
  print(m)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(m.parameters(), lr=rate)

  losses = []
  for i in range(epochs):
      run_loss = 0
      for x, y in train:
          
          x = x.view(x.shape[0], -1)
          optimizer.zero_grad()        
          output = m(x)
          loss = criterion(output, y)
          
          loss.backward() 
          
          optimizer.step()
          run_loss += loss.item()
      else:
          print("Epoch: ",i+1)
          print("Run loss: ",(run_loss/len(train)))
          losses.append(run_loss/len(train))


  correct=0
  with torch.no_grad():
    for images,labels in test:
      log_p = m(images.view(images.shape[0], -1))
      output = torch.squeeze(log_p)
      y_pred = output.data.max(1, keepdim=True)[1]
      correct += y_pred.eq(labels.data.view_as(y_pred)).sum()
    print('\nAccuracy Percent: {}/{} ({:.0f})\n'.format(correct, len(test.dataset),100. * correct / len(test.dataset)))
    print('\nTest Error Percent: ({:.0f})\n'.format(100 - 100. * correct / len(test.dataset)))  

  plt.xlabel('Epochs')
  plt.ylabel('Test Loss')
  plt.plot(losses)
  plt.show()




Q3 Case 1

In [None]:
"""
α = 0.05
BatchSize = 32
Epochs = 20
Initialised randomly between -1 and 1
"""
PytorchModel(32,0.05,epochs=20)

Q3 Case 2

In [None]:
"""
α = 0.05
BatchSize = 64
Epochs = 20
Initialised randomly between -1 and 1
"""
PytorchModel(64,0.05,epochs=20)

Q3 Case 3

In [None]:
"""
α = 0.005
BatchSize = 32
Epochs = 20
Initialised randomly between -1 and 1
"""
PytorchModel(32,0.005,epochs=20)

Q4 Case 1

In [None]:
"""
α = 0.05, 
BatchSize = 32, 
Epochs = 20, 
Initialised 0
"""

PytorchModel(32,0.01,epochs = 20)

Q4 Case 2

In [None]:
"""
α = 0.05
BatchSize = 32
Epochs = 20
Initialised randomly between -1 and 1
"""

PytorchModel(64,0.01,epochs=20)