Download MNIST

In [29]:
import numpy as np
from urllib import request
import gzip
import pickle

filename = [
["training_images","train-images-idx3-ubyte.gz"],
["test_images","t10k-images-idx3-ubyte.gz"],
["training_labels","train-labels-idx1-ubyte.gz"],
["test_labels","t10k-labels-idx1-ubyte.gz"]
]

def download_mnist():
    base_url = "http://yann.lecun.com/exdb/mnist/"
    for name in filename:
        print("Downloading "+name[1]+"...")
        request.urlretrieve(base_url+name[1], name[1])
    print("Download complete.")

def save_mnist():
    mnist = {}
    for name in filename[:2]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
    for name in filename[-2:]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
    with open("mnist.pkl", 'wb') as f:
        pickle.dump(mnist,f)
    print("Save complete.")

def init():
    download_mnist()
    save_mnist()
#    print ((load()[0]).shape)
def load():
    with open("mnist.pkl",'rb') as f:
        mnist = pickle.load(f)
    return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]

if __name__ == '__main__':
    init()

Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.


Load function

In [30]:
import numpy as np
from urllib import request
import gzip
import math
import pickle


def grad_softmax_crossentropy(X, y):
    m = y.shape[0]
    ones_for_answers = np.zeros_like(X)
    ones_for_answers[np.arange(len(X)), y] = 1

    p = np.exp(X) / np.exp(X).sum(axis=-1, keepdims=True)
    return (- ones_for_answers + p) / m

def load():
    with open("mnist.pkl",'rb') as f:
        mnist = pickle.load(f)

        training_images, training_labels, testing_images, testing_labels = mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]
        # Normalize the images
        training_images.astype('float32')
        testing_images.astype('float32')
        training_images = training_images / 255
        testing_images = testing_images / 255
        return training_images, training_labels, testing_images, testing_labels


TRimg,TRlab,TSimg,TSlab=load()
print(len(TRimg),len(TRlab),len(TSimg),len(TSlab))
print(len(TRimg[0]),len(TRlab),len(TSimg[0]),len(TSlab))

arr_2d = np. reshape(TRimg[0], (28, 28))

60000 60000 10000 10000
784 60000 784 10000


Numpy Implementation

In [31]:
#implement the model
#parameters: W: weight matrix
# b: bias
"""
Assume your input is X
Your output is Y

For a three layer FC NN, two set of parameters:
W: weight matrix for layer 1
b: Bias for layer 1
W2: weight matrix for layer 2
b2: Bias for layer 2
W3: weight matrix for layer 3
b3: Bias for layer 3
"""

D=784 # input size
h1 = 200 #first hidden layer
h2=50 #second hidden layer
K=10  # output size

#learning rate
step_size=0.1

#weight for regularization
reg=0.001
W=0.01*np.random.randn(D,h1)
b=np.zeros((1,h1))
W2=0.01*np.random.randn(h1,h2)
b2=np.zeros((1,h2))
W3=0.01*np.random.randn(h2,K)
b3=np.zeros((1,K))

#mini batch setting
Epoc=10
BatchSize=32

for i in range(Epoc):
  for j in range(0,60000,32):
    X=TRimg[j:j+BatchSize]
    Y=TRlab[j:j+BatchSize]
    num_examples=X.shape[0]

    # forward 
    hidden_layer1=np.maximum(0,np.dot(X,W)+b)
    hidden_layer=np.maximum(0,np.dot(hidden_layer1,W2)+b2)
    scores=np.dot(hidden_layer,W3)+b3
    exp_scores=np.exp(scores)
    probs=exp_scores/np.sum(exp_scores,axis=1,keepdims=True)

    # Calculate the loss
    correct_logprobs=-np.log(probs[range(num_examples),Y])
    data_loss=np.sum(correct_logprobs)/num_examples
    reg_loss=0.5*reg*np.sum(W*W)+0.5*reg*np.sum(W2*W2)+0.5*reg*np.sum(W3*W3)
    loss=data_loss+reg_loss

    #backpropagation (softmax+ crossentropy)
    dscores=probs
    dscores[range(num_examples),Y]-=1
    dscores/=num_examples

    #Direvative for the third layer (dw3 and db3)
    dw3=np.dot(hidden_layer.T,dscores) # direvative for the weight
    db3=np.sum(dscores, axis=0,keepdims=True)  # direvative for the bias
    dhidden=np.dot(dscores,W3.T)
    dhidden[hidden_layer <=0]=0 #Derivitative from Relu

    #Direvative for the second layer (dw2 and db2)
    dw2=np.dot(hidden_layer1.T,dhidden) # direvative for the weight
    db2=np.sum(dhidden, axis=0,keepdims=True)  # direvative for the bias
    dhidden1=np.dot(dhidden,W2.T)
    dhidden1[hidden_layer1 <=0]=0 #Derivitative from Relu

    #Direvative for the first layer (dw and db)
    dw= np.dot(X.T,dhidden1)
    db=np.sum(dhidden1,axis=0,keepdims=True)

    #derivative from regularization
    dw3+=reg*W3
    dw2+=reg*W2
    dw+=reg*W

    #update the parameters
    W+=-step_size*dw
    b+=-step_size*db
    W2+=-step_size*dw2
    b2+=-step_size*db2  
    W3+=-step_size*dw3
    b3+=-step_size*db3 


#use Test dataset for testing
# TSimg and Tslab
hidden_layer1=np.maximum(0,np.dot(TSimg,W)+b) #Relu(x*w+b) first layer
hidden_layer=np.maximum(0,np.dot(hidden_layer1,W2)+b2) #Relu(x*w+b) second layer
scores=np.dot(hidden_layer,W3)+b3 #(hidden*w3+b3)
predicted_class=np.argmax(scores,axis=1) #softmax()
print("training accuracy: %.2f" % (np.mean(predicted_class==TSlab)))

training accuracy: 0.97


Pytorch data

In [32]:
from __future__ import print_function

import numpy as np
from urllib import request
import gzip
import math
import pickle
from time import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(784, 200)
        self.l2 = nn.Linear(200, 50)
        self.l3 = nn.Linear(50,10)


    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.softmax(self.l3(x))

        return x

model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.1)

epoch=10
BatchSize=32
for i in range(epoch):
  for j in range(0,60000,BatchSize):
    X=TRimg[j:j+BatchSize]
    X=torch.tensor(X)
    Y=TRlab[j:j+BatchSize]
    Y=torch.tensor(Y)
    num_examples=X.shape[0]
    optimizer.zero_grad()
    output = model(X.float())
    loss=F.cross_entropy(output, Y)
    loss.backward()
    optimizer.step()


#TESTING
X=torch.tensor(TSimg)
Y=TSlab

output = model(X.float())
Pred=output.detach().numpy()
predicted_class=np.argmax(Pred,axis=1) #softmax()
print(predicted_class)
print(TSlab)
print("training accuracy: %.2f" % (np.mean(predicted_class==TSlab)))



[7 5 1 ... 4 5 6]
[7 2 1 ... 4 5 6]
training accuracy: 0.86
