In [1]:
#Import Statements
import numpy as np
import mnist_loader

In [2]:
class Network:
    #Construcutor
    def __init__(self,neurons):
        self.layers = len(neurons)
        self.neurons = neurons
        
        self.weights = []
        self.biases = []
        
        #Initialize weights
        for i in range(1,self.layers):
            #layer_weight = None
            rows  = neurons[i]
            cols = neurons[i-1]
            #Creates a numpy array with dimensions rows x cols
            #At the same time, initializes them with random normal distribution b/w 0 and 1
            layer_weight = np.zeros((rows,cols))
            self.weights.append(layer_weight)
                
            layer_bias = np.zeros((rows,1))
            self.biases.append(layer_bias)

In [3]:
def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

In [4]:
def feedforward(network,X):
    
    #Stores the Aj values
    Layer_Activations = []
    
    #Stores the Zj values
    Weighted_Sums = []
    
    for i in range(1,network.layers):
   
        if i==1:
            A = X
        else:
            A  = Layer_Activations[-1]
        
        W = network.weights[i-1]
        B = network.biases[i-1]
        Z = np.dot(W,A) + B
        Weighted_Sums.append(Z)
        A = sigmoid(Z)
        Layer_Activations.append(A)

    return (Weighted_Sums,Layer_Activations)

In [5]:
def test(Output,Target):
    #Output --> (10,Number_of_training_Samples)
    #Target --> (Number_of_traing_Samples)
    
    Prediction = np.argmax(Output,axis=0)
    #Prediction --> (Number_of_training_Samples,)    
    
    accuracy = (sum((Prediction == Target).astype(np.float32))/Target.shape[0])*100.0
    
    return accuracy

In [6]:
def sigmoid_derivative(z):
    #print("sigmoid_derivative =",sigmoid(z)*(1-sigmoid(z)) )
    return sigmoid(z)*(1-sigmoid(z))
    

In [7]:
def cost_gradient(output,label):
    #print("cost_gradient =",output-label)
    return output-label

In [8]:
def backprop(network,delta_output_layer,weighted_sums):
    #Initialize error for all layers
    #delta is the backpropogation error
    
    #Number of output layers --> first layer is an input layer, hence excluded
    n_out = network.layers-1
    
    delta_all_layers = [None]*(n_out)
    
    delta_all_layers[-1] = delta_output_layer
    
    for i in range(n_out-2,-1,-1):
        delta_all_layers[i] = np.dot(network.weights[i+1].transpose(),delta_all_layers[i+1])*sigmoid_derivative(weighted_sums[i])
            
    return delta_all_layers

In [11]:
train_data,valid_data,test_data = mnist_loader.load_data_wrapper()
network = Network([784,16,10])
weighted_sums,activations = feedforward(network,train_data[0])
# print(activations[-1])
delta_output_layer = cost_gradient(activations[-1],train_data[1])*sigmoid_derivative(weighted_sums[-1])
delta_all_layers = backprop(network,delta_output_layer,weighted_sums)
delta_all_layers[::-1]

[array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]]),
 array([[ 0.125, -0.125,  0.125, ...,  0.125,  0.125,  0.125],
        [ 0.125,  0.125,  0.125, ...,  0.125,  0.125,  0.125],
        [ 0.125,  0.125,  0.125, ...,  0.125,  0.125,  0.125],
        ..., 
        [ 0.125,  0.125,  0.125, ...,  0.125,  0.125,  0.125],
        [ 0.125,  0.125,  0.125, ..., -0.125,  0.125, -0.125],
        [ 0.125,  0.125,  0.125, ...,  0.125,  0.125,  0.125]])]