In [None]:
#Import Statements
import numpy as np
import mnist_loader

In [None]:
class Network:
    #Construcutor
    def __init__(self,neurons):
        self.layers = len(neurons)
        self.neurons = neurons
        
        self.weights = []
        self.biases = []
        
        #Initialize weights
        for i in range(1,self.layers):
            #layer_weight = None
            rows  = neurons[i]
            cols = neurons[i-1]
            #Creates a numpy array with dimensions rows x cols
            #At the same time, initializes them with random normal distribution b/w 0 and 1
            
            #layer_weight = np.zeros((rows,cols))
            layer_weight = np.random.randn(rows,cols)
            self.weights.append(layer_weight)

            #layer_bias = np.zeros((rows,1))
            layer_bias = np.random.randn(rows,1)
            self.biases.append(layer_bias)

In [None]:
def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

In [None]:
def feedforward(network,X):
    
    #Stores the Aj values
    Layer_Activations = []
    
    #Stores the Zj values
    Weighted_Sums = []
    
    for i in range(1,network.layers):
   
        if i==1:
            A = X
        else:
            A  = Layer_Activations[-1]
        
        W = network.weights[i-1]
        B = network.biases[i-1]
        Z = np.dot(W,A) + B
        Weighted_Sums.append(Z)
        A = sigmoid(Z)
        Layer_Activations.append(A)

    return (Weighted_Sums,Layer_Activations)

In [None]:
def test(network,test_data):

    _,activations = feedforward(network,test_data[0])
    
    Output = activations[-1]
    Target = test_data[1]
    
    #Output --> (10,Number_of_training_Samples)
    #Target --> (Number_of_traing_Samples)
    
    Prediction = np.argmax(Output,axis=0)
    #Prediction --> (Number_of_training_Samples,)    
    
    accuracy = (sum((Prediction == Target).astype(np.float32))/Target.shape[0])*100.0
    
    return accuracy

In [None]:
def sigmoid_derivative(z):
    #print("sigmoid_derivative =",sigmoid(z)*(1-sigmoid(z)) )
    return sigmoid(z)*(1-sigmoid(z))
    

In [None]:
def cost_gradient(output,label):
    #print("cost_gradient =",output-label)
    return output-label

In [None]:
def backprop(network,delta_output_layer,weighted_sums):
    #Initialize error for all layers
    #delta is the backpropogation error
    
    #Number of output layers --> first layer is an input layer, hence excluded
    n_out = network.layers-1
    
    delta_all_layers = [None]*(n_out)
    
    delta_all_layers[-1] = delta_output_layer
    
    for i in range(n_out-2,-1,-1):
        delta_all_layers[i] = np.dot(network.weights[i+1].transpose(),delta_all_layers[i+1])*sigmoid_derivative(weighted_sums[i])
            
    return delta_all_layers

In [None]:
def update_weights(network,X,layer_activations,delta_all_layers,alpha):
    
    n_weights = network.layers-1
    for i in range(n_weights):
        
#         print("shape of delta_all_layers[",i,"]:",delta_all_layers[i].shape)
#         print("shape of layer_activations[",i,"].transpose():",delta_all_layers[i].transpose().shape)            
        if i==0:
            layer_input = X
        else:
            layer_input = layer_activations[i-1]
        
        dcdw = np.dot(delta_all_layers[i],layer_input.transpose())
        dcb = np.average(delta_all_layers[i],axis=1).reshape(delta_all_layers[i].shape[0],1)
#         print("shape of dcdw:",dcdw.shape)
#         print("shape of dcb:",dcb.shape)
#         print("shape of network.weights[",i,"]:",network.weights[i].shape)
        
        network.weights[i] -= alpha * dcdw
        network.biases[i] -= alpha * dcb
        
    return

In [None]:
def train_GD(network,train_data,valid_data,alpha=1,epochs=50):
    
    for i in range(epochs):
        
        #Step 1: Calculate Output
        weighted_sums,activations = feedforward(network,train_data[0])
        
        #Step 2: Calculate Error at final layer
        delta_output_layer = cost_gradient(activations[-1],train_data[1])*sigmoid_derivative(weighted_sums[-1])
        
        #Step 3: Backpropogate Error
        delta_all_layers = backprop(network,delta_output_layer,weighted_sums)
        
        #Step 4: Update Weights
        update_weights(network,train_data[0],activations,delta_all_layers,1)

        #Step 5: Validation Testing
        accuracy = test(network,valid_data)

        print("End of Epoch",i," accuracy =",accuracy)
    
    return

In [None]:
train_data,valid_data,test_data = mnist_loader.load_data_wrapper()
network = Network([784,16,10])
train_GD(network,train_data,valid_data,alpha=0.5,epochs=100)
accuracy = test(network,test_data)
print("On Testing Data: Accuracy =",accuracy)