# A) Numpy Implementation

In [1]:
import math
import numpy as np  
from download_mnist import load
import operator  
import time

In [2]:
x_train, y_train, x_test, y_test = load()

x_mean = np.mean(x_train, axis = 0)
x_std = np.std(x_train, axis = 0)

def standardize(data):
    stan_data = []
    for i in range(28*28):
        if x_std[i] == 0:
            stan_data.append(data[:,i] - x_mean[i])
        else:
            stan_data.append((data[:,i] - x_mean[i]) / x_std[i])
    return np.array(stan_data).T

x_train = standardize(x_train)
x_test = standardize(x_test)

def relu(Z):
    return np.maximum(0,Z)

def softmax(Z):
    return np.exp(Z) / np.sum(np.exp(Z), axis=1)[:,None]

In [3]:
def main():
    global x_train
    global y_train
    batch_size = 128
    FC1W =  0.01 * np.random.randn(28*28, 200)
    FC2W =  0.01 * np.random.randn(200, 50)
    FC3W = 0.01 * np.random.randn(50, 10)
    b1 = 0.0 * np.random.randn(1, 200)
    b2 = 0.0 * np.random.randn(1, 50)
    b3 = 0.0 * np.random.randn(1, 10)
    
    step_size = 0.01
    reg = 0.001
    num_examples = x_train.shape[0]
    
    for i in range(20):
        pem = np.random.permutation(x_train.shape[0])
        x_train = x_train[pem]
        y_train = y_train[pem]
        counter = 0
        while counter < x_train.shape[0]:
            x_train_batch = x_train[counter:counter + batch_size]
            y_train_batch = y_train[counter:counter + batch_size]
            num_examples = x_train_batch.shape[0]
            
            scores1 = np.dot(x_train_batch,FC1W) + b1
            relu1 = relu(scores1)
            scores2 = np.dot(relu1, FC2W) + b2
            relu2 = relu(scores2)
            scores3 = np.dot(relu2, FC3W) + b3

            probs = softmax(scores3)

            correct_logprobs = -np.log(probs[range(num_examples), y_train_batch])
            data_loss = np.sum(correct_logprobs)/num_examples
            reg_loss = 0.5*reg*(np.sum(FC1W*FC1W)+np.sum(FC2W*FC2W)+np.sum(FC3W*FC3W))
            loss = data_loss + reg_loss
            if counter == 0:
                print("Epoch %d: loss %f" % (i+1,loss))
            counter+=batch_size

            preds = np.argmax(probs, axis = 1)

            dscores3 = probs
            dscores3[range(num_examples),y_train_batch] -=1
            dscores3 /= num_examples


            dFC3W = np.dot(relu2.T, dscores3) + reg * FC3W
            db3 = np.sum(dscores3, axis = 0, keepdims=True)

            dRELU2 = np.dot(dscores3, FC3W.T)

            dRELU2[scores2 <= 0] = 0

            dFC2W = np.dot(relu1.T, dRELU2)+ reg * FC2W
            db2 = np.sum(dRELU2, axis = 0, keepdims=True)

            dRELU1 = np.dot(dRELU2, FC2W.T)

            dRELU1[scores1 <= 0] = 0

            dFC1W = np.dot(x_train_batch.T, dRELU1)+ reg * FC1W
            db1 = np.sum(dRELU1, axis = 0, keepdims=True)

            FC3W -= step_size * dFC3W
            b3 -= step_size * db3
            FC2W -= step_size * dFC2W
            b2 -= step_size * db2
            FC1W -= step_size * dFC1W
            b1 -= step_size * db1
        
        tscores1 = np.dot(x_test,FC1W) + b1
        trelu1 = relu(tscores1)
        tscores2 = np.dot(trelu1, FC2W) + b2
        trelu2 = relu(tscores2)
        tscores3 = np.dot(trelu2, FC3W) + b3

        tprobs = softmax(tscores3)
        tpreds = np.argmax(tprobs, axis = 1)
        num_correct = len(np.where(tpreds - y_test == 0)[0])
        print('Epoch %d test accuracy: %f' % ( i+1, num_correct/x_test.shape[0] * 100))
        
        
if __name__ == '__main__':
    start_time = time.time()
    main()
    print ("---execution time: %s seconds ---" % (time.time() - start_time))

Epoch 1: loss 2.310995
Epoch 1 test accuracy: 11.350000
Epoch 2: loss 2.308904
Epoch 2 test accuracy: 19.980000
Epoch 3: loss 2.292296
Epoch 3 test accuracy: 28.350000
Epoch 4: loss 2.066572
Epoch 4 test accuracy: 44.430000
Epoch 5: loss 1.525084
Epoch 5 test accuracy: 60.860000
Epoch 6: loss 0.951128
Epoch 6 test accuracy: 80.320000
Epoch 7: loss 0.649808
Epoch 7 test accuracy: 86.620000
Epoch 8: loss 0.584132
Epoch 8 test accuracy: 89.860000
Epoch 9: loss 0.417603
Epoch 9 test accuracy: 91.140000
Epoch 10: loss 0.276290
Epoch 10 test accuracy: 91.940000
Epoch 11: loss 0.315619
Epoch 11 test accuracy: 92.560000
Epoch 12: loss 0.297862
Epoch 12 test accuracy: 93.020000
Epoch 13: loss 0.284300
Epoch 13 test accuracy: 93.490000
Epoch 14: loss 0.274419
Epoch 14 test accuracy: 93.800000
Epoch 15: loss 0.259660
Epoch 15 test accuracy: 94.240000
Epoch 16: loss 0.284787
Epoch 16 test accuracy: 94.400000
Epoch 17: loss 0.227195
Epoch 17 test accuracy: 94.640000
Epoch 18: loss 0.210644
Epoch 18

# B) PyTorch Implementation

In [4]:
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


In [5]:
 def main():

    torch.manual_seed(90)

    device = torch.device("cpu")

    kwargs = {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=128, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=10000, shuffle=True, **kwargs)

    
    
    model = torch.nn.Sequential(
                torch.nn.Linear(28*28, 200),
                torch.nn.ReLU(),
                torch.nn.Linear(200, 50),
                torch.nn.ReLU(),
                torch.nn.Linear(50,10),
                torch.nn.Softmax(dim = 1)).to(device)
    
    optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9, weight_decay = 0.001)
    for epoch in range(10):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            data = data.view(-1,28*28)
            optimizer.zero_grad()
            output = model(data)
            loss_fn = torch.nn.CrossEntropyLoss()
            loss = loss_fn(output, target)
            loss.backward()
            optimizer.step()
            if batch_idx == 0:
                print('Train Epoch: {}\tLoss: {:.6f}'.format(
                    epoch+1, loss.item()))
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                data = data.view(-1,28*28)
                output = model(data)
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()
        print('\nTest set: Accuracy: {}/{} \n'.format(
            correct, len(test_loader.dataset)))
    
    
if __name__ == '__main__':
    start_time = time.time()
    main()
    print ("---execution time: %s seconds ---" % (time.time() - start_time))


Train Epoch: 1	Loss: 2.302934

Test set: Accuracy: 8353/10000 

Train Epoch: 2	Loss: 1.606482

Test set: Accuracy: 9142/10000 

Train Epoch: 3	Loss: 1.572431

Test set: Accuracy: 9267/10000 

Train Epoch: 4	Loss: 1.543095

Test set: Accuracy: 9337/10000 

Train Epoch: 5	Loss: 1.510418

Test set: Accuracy: 9389/10000 

Train Epoch: 6	Loss: 1.547799

Test set: Accuracy: 9472/10000 

Train Epoch: 7	Loss: 1.527524

Test set: Accuracy: 9497/10000 

Train Epoch: 8	Loss: 1.498596

Test set: Accuracy: 9508/10000 

Train Epoch: 9	Loss: 1.495493

Test set: Accuracy: 9537/10000 

Train Epoch: 10	Loss: 1.528786

Test set: Accuracy: 9573/10000 

---execution time: 220.04080486297607 seconds ---
