In [495]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm 
import seaborn as sns
import math

# Loading data

In [496]:
train_fdata = pd.DataFrame(pd.read_csv("mnist_train.csv"))
test_fdata  = pd.DataFrame(pd.read_csv("mnist_test.csv"))
train_label = np.array(train_fdata["label"])
train_data= np.array(train_fdata.drop("label" , axis =1))
test_label=np.array(test_fdata['label'])
test_data=np.array(test_fdata.drop("label",  axis =1 ))

# Preprocessing

In [497]:
def encoding_labels(data):
    train_label_encoded=np.zeros((10,data.shape[0]))
    for ind in range (data.shape[0]):
        val=data[ind]
        for row in range (10):
            if (val==row):
                train_label_encoded[val,ind]=1
    return train_label_encoded

In [498]:
train_data =np.transpose(train_data)
train_label=encoding_labels(train_label)
test_data =np.transpose(test_data) 
test_label=encoding_labels(test_label)
print(train_data.shape ,train_label.shape , test_data.shape ,test_label.shape )

(784, 60000) (10, 60000) (784, 10000) (10, 10000)


# Activation functions


In [499]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))
def tanH(Z):
    return (np.tanh(Z)) 


# Derivative of activation functions

In [500]:
def sigmoid_backward(Z):    
    s = sigmoid(Z)
    dZ = s * (1-s)
    return dZ
def tanH_backward(Z):
    tanh = tanH(Z)
    dZ = (1 + tanh)*( 1- tanh) 
    return dZ
    

# Initialization

In [501]:
def initialize_parameters(num_of_hidden_layers, number_of_neurons ,num_of_epochs  ,input_layer, output_layer ,
                          Activation_func = "sigmoid" , Add_bias=True , eta = 0.01  ):
    #initialize parameters
    parameters = {}
    parameters["Activation_function"]=Activation_func
    parameters["learning_rate"] =eta
    parameters["Num_of_epochs"] = num_of_epochs
    parameters["Add_bias"] = Add_bias
    parameters["number_of_layers"] =num_of_hidden_layers+1
    
    #initialize weights
    weights ={}
    L = num_of_hidden_layers+2           # number of layers in the network
    layers = [input_layer]
    for i in range(num_of_hidden_layers):
        layers.append(number_of_neurons)
    layers.append(output_layer)
    for l in range(1, L):
        weights['W' + str(l)] = np.random.randn(layers[l] , layers[l-1])
        weights['b' + str(l)] = np.zeros(( layers[l] ,1))
    
    return parameters , weights

# Backpropogation Model

<h4> Feedforward propagation</h4>

In [502]:
def feedforward_propagation(X, parameters , weights):
    caches = []
    A = X
    num_of_layers = parameters["number_of_layers"]+1             
    for l in range(1, num_of_layers):
        W ,b , activation = weights['W' + str(l)], weights['b' + str(l)],parameters["Activation_function"]
        Z = (np.dot(W,A)) +b
        cache = (A , W , b , Z)
        if activation == "sigmoid":
            A = sigmoid(Z)
        elif activation == "tanH":
            A = tanH(Z)
        caches.append(cache)               
    return A , caches 

<h4> Backward Model</h4>

In [503]:
def activation_backward(dA, cache, activation):
    A_prev, W, b, Z = cache
    if activation == "tanH":
        dZ = dA* tanH_backward(Z) 
    elif activation == "sigmoid":
        dZ = dA* sigmoid_backward(Z)
    m = A_prev.shape[1]
    dW = 1./m * np.dot(dZ,A_prev.T)  
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T,dZ)
    return dA_prev, dW, db

In [504]:
def L_model_backward(AL, Y, caches  , parameters ):
    #output layer update
    grads = {}
    L = parameters["number_of_layers"]  
    E = AL - Y
    current_cache = caches[L-1]
    dA_prev_temp, dW_temp, db_temp  = activation_backward(E, current_cache,parameters["Activation_function"])
    grads["dA"+str(L)]=dA_prev_temp
    grads["dW"+str(L)]=dW_temp
    grads["db"+str(L)]=db_temp
    # hidden layersupdate
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = activation_backward(grads["dA" + str(l +2)], current_cache,parameters["Activation_function"])
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)]=db_temp
    return grads

<h4> update parameters</h4>

In [505]:
def update_parameters(weights, grads, learning_rate ,parameters ):
    for l in range(parameters["number_of_layers"]):
        weights["W" + str(l+1)] =weights["W" + str(l+1)] + (learning_rate*grads["dW" + str(l+1)])
        if(parameters["Add_bias"]):
            weights["b" + str(l+1)] = weights["b" + str(l+1)] + (learning_rate*grads["db" + str(l+1)])
    return weights

In [506]:
def model(parameters , weights , X ,Y):
    for i in range(0, parameters["Num_of_epochs"]):
        # Implementing feedforward propagation
        A1 , caches = feedforward_propagation(X, parameters , weights)
        # Calculating error 
        L = len(caches)
        grads = L_model_backward(A1, Y, caches ,parameters)
        #updating parameters
        weights = update_parameters(weights, grads, parameters["learning_rate"] ,parameters) 
    return parameters , weights   

In [507]:
def fit(X, Y, learning_rate , num_iterations ,num_of_hidden_layers , number_of_neurons , Activation_func, Add_bias=True):
    parameters ,weights = initialize_parameters(num_of_hidden_layers, number_of_neurons ,num_iterations , X.shape[0],
    Y.shape[0] , Activation_func , Add_bias=True , eta = learning_rate)
    parameters ,weights  = model(parameters , weights , X ,Y)
    return parameters ,weights 

In [508]:
parameters,weights = fit(train_data, train_label, 0.05 ,  5 , 4 , 16 , "sigmoid" ) 

  return 1/(1+np.exp(-Z))


In [509]:
def compute_cost(A, Y):
    m = Y.shape[1]
    count =0
    for i in range(m):
        ind1 = -1
        for j in range(10):
            if Y[j][i]==1:
                ind1=j
                break
        ind2=-1
        maxi= -400
        for j in range(10):
            if A[j][i]>maxi:
                ind2=j
                maxi=A[j][i]
       # print(ind1 ,ind2,A[ind2][i] )
        if ind1 == ind2:
            count+=1      
    return count/m

In [510]:
a ,_= feedforward_propagation(test_data, parameters,weights) 

  return 1/(1+np.exp(-Z))


In [511]:
compute_cost(a ,test_label)

0.0958