In [None]:
#import relevent libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
print("Libraries imported successfully")

In [None]:
# load train and test datasets
train=pd.read_csv('datasets/train.csv')
df=pd.DataFrame(train)  
df.head(10)



In [None]:
#parameters and hyper-parameters defined
m=100 #number of training examples
print("No. of examples : {} ".format(m))
num_layer = 3 # number of layers
input_nodes = train.shape[1]-1# number of input nodes, excludes first column 
print("Input nodes : {} ".format(input_nodes))
h1 = 128      # hidden layer 1
h2 = 64       # hidden layer 2
output_nodes = 10 # number of output nodes


In [None]:
#normalize inputs
input=((np.asfarray(train.iloc[0:100,1:])).reshape(100,784)/ 255.0 * 0.99) + 0.01 

#change output to categorial output 
output = np.zeros((m, 10))+ 0.01
for i in range(m):
    output[i, np.array(train.label)[i]] = 0.99
    
# convert output to float datatype
output = output.astype(float)

#transpose output and input
output=np.transpose(output)
input=np.transpose(input)



In [None]:
print("The size of input is {}.".format(input_nodes))
print("The size of output is {}.".format(output_nodes))
print(input.shape)
print(input)
print("output")
print(output.shape)
print(output)

In [None]:
#define sigmoid function
def sigmoid(x):
    return 1/(1+np.exp(-x))



In [None]:
#initialize the required parameters with array of appropriate dimensions
def parameters_initialization(input_nodes, h1, h2, output_nodes):
    
    #weights are initialized in each layer with random values
    W1=np.random.randn(h1,input_nodes)*0.1
    b1=np.zeros((h1,1))
    W2=np.random.randn(h2, h1)*np.sqrt(1./h2)
    b2=np.zeros((h2,1))
    W3=np.random.randn(output_nodes, h2)*0.01
    b3=np.zeros((output_nodes,1))
    
    
    parameters={
        "W1":W1,
        "W2":W2,
        "W3":W3,
        "b1":b1,
        "b2":b2,
        "b3":b3
    }
    
    return parameters
    

In [None]:
parameters=parameters_initialization(input_nodes, h1, h2, output_nodes)
print(parameters['W1'])
print(parameters["W2"])
print(parameters["W3"])

In [None]:
#forward propagation
def forward_propagation(input,parameters):
    
    #first layer
    Z1=np.dot(parameters["W1"],input)+parameters["b1"]
    A1=np.tanh(Z1)
    
    #second layer
    Z2=np.dot(parameters["W2"],A1)+parameters["b2"]
    A2=np.tanh(Z2)
    
    #output layer
    Z3=np.dot(parameters["W3"],A2)+parameters["b3"]
    A3=sigmoid(Z3)
    
    
    cache={
        "A1": A1,
        "A2": A2,
        "A3": A3,
        "Z1": Z1,
        "Z2": Z2,
        "Z3": Z3
        
    }
    

    return A3, cache
    

In [None]:
A3, cache=forward_propagation(input,parameters)
print(cache["A3"])

In [None]:
#compute_cost
def compute_cost(A2, output, parameters):
    logprobs = np.multiply(np.log(A2),output)
    cost = -np.sum(logprobs)/output.shape[1]
    return cost


In [None]:
#back propagation
def back_propagation(input,output,learning_rate,parameters, cache):
 
    dZ3=cache["A3"]-output
    dW3=np.dot(dZ3,cache["A2"].T)/784
    db3 = np.sum(dZ3, axis=1, keepdims = True)/784
   
    
    dZ2=np.dot(parameters["W3"].T,dZ3)*(1 - np.power(cache["A2"], 2))
    dW2=np.dot(dZ2,cache["A1"].T)/784
    db2 = np.sum(dZ2, axis=1, keepdims = True)/784
    
    
    dZ1=np.dot(parameters["W2"].T,dZ2)*(1 - np.power(cache["A1"], 2))
    dW1=np.dot(dZ1,input.T)
    db1 = np.sum(dZ1, axis=1, keepdims = True)/784
    
    
    
    grads={
        'dW1':dW1,
        'dW2':dW2,
        'dW3':dW3,
        'db1':db1,
        'db2':db2,
        'db3':db3
    }
    

    return grads
    
    

In [None]:
#update parameters
def update_parameters(parameters,grads):
    
    W1 = parameters["W1"]-learning_rate*grads["dW1"]
    b1 = parameters["b1"]-learning_rate*grads["db1"]
    W2 = parameters["W2"]-learning_rate*grads["dW2"]
    b2 = parameters["b2"]-learning_rate*grads["db2"]
    W3 = parameters["W3"]-learning_rate*grads["dW3"]
    b3 = parameters["b3"]-learning_rate*grads["db3"]

    
    parameters = {"W1": W1,
                  "W2": W2,
                  "W3": W3,
                  "b1": b1,
                  "b2": b2,
                  "b3": b3
                }
    
    return parameters

In [None]:
#train data
num_iterations=2500
learning_rate=0.1 
parameters=parameters_initialization(input_nodes, h1, h2, output_nodes)
for epoch in range(0,num_iterations):
    print("Iteration: "+ str(epoch+1)+"/"+str(num_iterations))   
    A3,cache=forward_propagation(input,parameters) #forward propagation
    grads=back_propagation(input,output,learning_rate,parameters,cache) #back propagation
    parameters=update_parameters(parameters,grads) #update parameters
    print("A3")
    print(A3.shape)
    print(cache["A3"].T)

    
    

In [None]:
test_file = open("datasets/test.csv", 'r')
test = test_file.readlines()
test_file.close()



In [None]:
#showing a random value from the dataset
import random
random_int = random.randint(0,1000)
selected_input = test[random_int].split(',')
image_array = np.asfarray(selected_input[0:]).reshape((28,28))
plt.imshow(image_array, cmap = 'Greys', interpolation=None)
print("The target value is: " )

In [None]:
#predict
target_input=((np.asfarray(selected_input).reshape(784,1))/ 255.0 * 0.99) + 0.01 
A3,cache=forward_propagation(target_input,parameters)
label = np.argmax(A3)   
print("The predicted number is " + str(label))