In [None]:
from mnist_loader import load_data_wrapper
import numpy as np
import random 
import matplotlib.pyplot as plt


In [None]:
training_data , validation_data , test_data = load_data_wrapper()

In [None]:
def plot_images(images):
    """ Plot a list of MNIST images.
        Argument images is a list of (image, label) tuples. 
    """
    fig, axes = plt.subplots(nrows=1, ncols=len(images))
    for j, ax in enumerate(axes):
        ax.matshow(images[j][0].reshape(28,28), cmap = plt.cm.binary)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()
    

In [None]:
def predict(images , w1 , w2 , B1 , B2 ):
    prediction = []
    for im in images:
        a = apply_NN(im[0] , w1 , w2 , B1 , B2 )
        prediction.append(np.argmax(a))
    return prediction


In [None]:
def sigmoid(x):
    """sigmoid function"""
    return 1/(1+ np.exp(-x))


In [None]:
def sigmoid_prime(x):
    """Derivation of sigmoid function"""
    return sigmoid(x) * (1-sigmoid(x))

In [None]:
def apply_NN(x , W1 , W2 , B1 , B2):
    z1 = np.dot(W1,x) + B1
    A1 = sigmoid(z1)
    z2 = np.dot(W2,A1) + B2
    A2 = sigmoid(z2)
    return A2


In [None]:
def vectorize_mini_batch(mini_batch):
    """Given a minibatch of (image,label) tuples 
    return the tuple X,Y where X contains all of the images and Y contains
    all of the labels stacked horizontally """
    mini_batch_x = []
    mini_batch_y = []
    for i in range(0, len(mini_batch)):
        mini_batch_x.append(mini_batch[i][0])
        mini_batch_y.append(mini_batch[i][1])
    X = np.hstack(mini_batch_x)
    Y = np.hstack(mini_batch_y)
    return X, Y 

In [None]:
def SGD(training_data , epochs , mini_batch_size , eta , test_data):
    """Gradient descent.
    Epochs: the number of times the entire training_data is examined.
    mini_batch_size: the number of images used to approximate the gradient 
    each step of the gradient descent.
    eta: the learning rate or the step size
    test_data: check accuracy of the model against the test_data every epoc
    """
    n = len(training_data)
    n_test = len(test_data)
    
    w1 = np.random.randn(30 , 784)
    w2 = np.random.randn(10 , 30)
    b1 = np.random.randn(30 , 1)
    b2 = np.random.randn(10 , 1)
    
    for j in range(epochs):
        random.shuffle(training_data)
        for k in range ( 0 , n , mini_batch_size):
            mini_batch =  training_data[k : k+mini_batch_size]
            x , y = vectorize_mini_batch(mini_batch)
            
            # feed forward
            z1 = np.dot(w1, x)+b1
            A1 = sigmoid(z1)
            z2 = np.dot(w2, A1)+b2
            A2 = sigmoid(z2)
            
            #backpropagate
            dz2 = 1/mini_batch_size* (A2 - y )*sigmoid_prime(z2) #fundamental equation 1
            dw2 = np.dot(dz2 , A1.T)                                         #equation 3
            db2 = 1/mini_batch_size*np.sum(dz2 , axis = 1 , keepdims = True) #equation 4
            
            dz1 = 1/mini_batch_size* np.dot(w2.T,dz2) * sigmoid_prime(z1)    #equation 2
            dw1 = np.dot(dz1,x.T)            #equation 3 
            db1 = 1/mini_batch_size*np.sum(dz1 , axis = 1 , keepdims = True )#equation 4
            
            #update paratmeters 
            w2 = w2 - eta*dw2 
            w1 = w1 - eta*dw1
            b2 = b2 - eta*db2
            b1 = b1 - eta*db1
            
        #after every Epoch, check the accuracy of model 
        test_result = [( np.argmax(apply_NN(x,w1,w2,b1,b2)) , y) for (x,y) in test_data]
        num_correct = sum(int ( x == y )  for (x, y) in test_result )
        print (" Epoch {} : {} / {}".format(j , num_correct , n_test));
        
    return w1,b1,w2,b2


In [None]:
w1 , b1 , w2 , b2 = SGD(training_data , 30 , 10 , 3 , test_data)

In [None]:
predict(training_data[0:10] , w1,w2, b1 , b2)


In [None]:
plot_images(training_data[0:10])