In [513]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm 
import seaborn as sns
import math

# Loading data

In [514]:
train_fdata = pd.DataFrame(pd.read_csv("mnist_train.csv"))
test_fdata  = pd.DataFrame(pd.read_csv("mnist_test.csv"))
train_label = np.array(train_fdata["label"])
train_data= np.array(train_fdata.drop("label" , axis =1))
test_lable=np.array(test_fdata['label'])
test_data=np.array(test_fdata.drop("label",  axis =1 ))

# Preprocessing

In [515]:
def encoding_labels(data):
    train_label_encoded=np.zeros((10,data.shape[0]))
    for ind in range (data.shape[0]):
        val=data[ind]
        for row in range (10):
            if (val==row):
                train_label_encoded[val,ind]=1
    return train_label_encoded

In [516]:
train_data =np.transpose(train_data)
train_label=encoding_labels(train_label)
print(train_data.shape ,train_label.shape )

(784, 60000) (10, 60000)


# Activation functions


In [517]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))
def tanH(Z):
    return (1-np.exp(-Z))/(1+np.exp(-Z)) 


# Derivative of activation functions

In [518]:
def sigmoid_backward(Z):    
    s = sigmoid(Z)
    dZ = s * (1-s)
    return dZ
def tanH_backward(Z):
    tanh = tanH(Z)
    dZ = (1 + tanh)*( 1- tanh) 
    return dZ
    

# Initialization

In [519]:
def initialize_parameters(num_of_hidden_layers, number_of_neurons ,num_of_epochs  ,input_layer, output_layer ,
                          Activation_func = "sigmoid" , Add_bias=True , eta = 0.01  ):
    #initialize parameters
    parameters = {}
    parameters["Activation_function"]=Activation_func
    parameters["learning_rate"] =eta
    parameters["Num_of_epochs"] = num_of_epochs
    parameters["Add_bias"] = Add_bias
    parameters["number_of_layers"] =num_of_hidden_layers+1
    
    #initialize weights
    weights ={}
    L = num_of_hidden_layers+2           # number of layers in the network
    layers = [input_layer]
    for i in range(num_of_hidden_layers):
        layers.append(number_of_neurons)
    layers.append(output_layer)
    for l in range(1, L):
        weights['W' + str(l)] = np.random.randn(layers[l] , layers[l-1])
        weights['b' + str(l)] = np.zeros(( layers[l] ,1))
    
    return parameters , weights

# Backpropogation Model

In [520]:
def Compute_output(X, W, b):
    return (np.dot(W,X)) +b

In [521]:
def linear_activation_forward(A_prev, W, b, activation):
    Z = Compute_output(A_prev, W, b)
    if activation == "sigmoid":
        A = sigmoid(Z)
    elif activation == "tanH":
        A = tanH(Z)
    caches = (A_prev , W , b , Z)
    return A, caches

In [522]:
def feedforward_propagation(X, parameters , weights):
    A = X
    caches = []
    num_of_layers = parameters["number_of_layers"]+1             
    for l in range(1, num_of_layers):
        A , cache = linear_activation_forward(A, weights['W' + str(l)], weights['b' + str(l)],parameters["Activation_function"])  
        caches.append(cache)               
    return A , caches 

In [523]:
def Compute_backward(dZ, A_prev, W, b):
    m = A_prev.shape[1]
    dW = 1./m * np.dot(dZ,A_prev.T)  
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T,dZ)
    return dA_prev, dW, db
    
def linear_activation_backward(dA, cache, activation):
    A_prev, W, b, Z = cache
    dZ=0
    if activation == "tanH":
        dZ = dA* tanH_backward(Z) 
    elif activation == "sigmoid":
        dZ = dA* sigmoid_backward(Z)
   
    m = A_prev.shape[1]
    dW = 1./m * np.dot(dZ,A_prev.T)  
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T,dZ)
    return dA_prev, dW, db

In [524]:
def L_model_backward(AL, Y, caches  , parameters ):
    grads = {}
    L = len(caches)   # the number of layers
    dAL = AL - Y
    M= parameters["number_of_layers"]+1
    current_cache = caches[M-2]

    dA_prev_temp, dW_temp, db_temp  = linear_activation_backward(dAL, current_cache,parameters["Activation_function"])
    
    grads["dA"+str(M-1)]=dA_prev_temp
    grads["dW"+str(M-1)]=dW_temp
    grads["db"+str(M-1)]=db_temp
             
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l +2)], current_cache,parameters["Activation_function"])
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)]=db_temp
 
    
    return grads

In [525]:
def update_parameters(weights, grads, learning_rate ,parameters ):
    for l in range(len(weights)//2):
        weights["W" + str(l+1)] =weights["W" + str(l+1)] - (learning_rate*grads["dW" + str(l+1)])
        if(parameters["Add_bias"]):
            weights["b" + str(l+1)] = weights["b" + str(l+1)] - (learning_rate*grads["db" + str(l+1)])
    return weights

In [526]:
def model(parameters , weights , X ,Y):
    for i in range(0, parameters["Num_of_epochs"]):
        # Implementing feedforward propagation
        A1 , caches = feedforward_propagation(X, parameters , weights)
       
        # Calculating cost
        e = A1 - Y
        
        A_prev , W , b , Z = caches[-1]
        
        
        dw1 = e * sigmoid_backward(Z)  
        L = len(caches)
        print(i)
        #dA_prev_temp, dW_temp, db_temp  = linear_activation_backward(e, caches,parameters["Activation_function"])
        grads = L_model_backward(A1, Y, caches ,parameters)
        weights = update_parameters(weights, grads, parameters["learning_rate"] ,parameters) 
    return parameters , weights

        
    

In [527]:
def fit(X, Y, learning_rate , num_iterations ,num_of_hidden_layers , number_of_neurons , Activation_func, Add_bias=True):

    parameters ,weights = initialize_parameters(num_of_hidden_layers, number_of_neurons ,num_iterations , X.shape[0],
    Y.shape[0] , Activation_func , Add_bias=True , eta = 0.01)
    parameters ,weights  = model(parameters , weights , X ,Y)
    return parameters ,weights 




In [528]:
parameters,weights = fit(train_data, train_label, 0.01 ,  30 , 4 , 16 , "sigmoid" ) 

  return 1/(1+np.exp(-Z))


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


In [529]:
def compute_cost(A, Y):
    m = Y.shape[1]
    count =0
    for i in range(m):
        ind1 = -1
        for j in range(10):
            if Y[j][i]==1:
                ind1=j
                break
        ind2=-1
        maxi= -400
        for j in range(10):
            if A[j][i]>maxi:
                ind2=j
                maxi=A[j][i]
        print(ind1 ,ind2,A[ind2][i] )
        if ind1 == ind2:
            count+=1      
    return count/m

In [530]:
a ,_= feedforward_propagation(train_data, parameters,weights) 

  return 1/(1+np.exp(-Z))


In [531]:
compute_cost(a ,train_label)

5 2 0.929850780451635
0 5 0.9354152973896673
4 5 0.9163272279338901
1 5 0.9561296562896131
9 5 0.9190897537323159
2 2 0.923066999463854
1 5 0.9381685413221191
3 2 0.9229907238480606
1 2 0.9256024615962233
4 2 0.9072179421025636
3 5 0.9498891568841391
5 5 0.9328505352726777
3 5 0.954477460158675
6 5 0.9255359196737615
1 2 0.9469155497832258
7 5 0.9200008688596498
2 2 0.9067513286513177
8 5 0.9397952964191932
6 5 0.9576443266371711
9 5 0.9381108281970276
4 2 0.9193718456503029
0 5 0.9278576290218313
9 2 0.9227669101517934
1 5 0.9561296562896131
1 5 0.9592354286662444
2 5 0.9461699150710742
4 5 0.9337150486637686
3 5 0.923331934765992
2 2 0.9228938718799391
7 5 0.9457789249575644
3 2 0.9365821448328364
8 2 0.9216339004049464
6 2 0.9540602462478809
9 5 0.9304736561408042
0 2 0.9205577845333063
5 2 0.9501249998258737
6 5 0.9218399570122681
0 2 0.9307389063387222
7 5 0.9158737785136365
6 2 0.930787387643198
1 5 0.920476239969579
8 2 0.924579626921376
7 5 0.9364721522801769
9 5 0.937622796476

0.06636666666666667

In [532]:
weights

{'W1': array([[-1.420337  ,  1.46669415, -0.08205571, ..., -1.89730857,
          0.00894722,  0.14664122],
        [-0.90934021,  0.05984176,  1.67799412, ..., -0.25829894,
         -0.37237772,  0.17322177],
        [-0.23101995, -0.01667571,  0.63282972, ..., -1.22674881,
         -0.138443  ,  0.19919932],
        ...,
        [-0.79025111,  1.20041178,  0.25429234, ...,  1.34427522,
          1.34208695, -1.47669431],
        [-2.40237637,  0.28932081, -0.83641049, ...,  0.78711178,
          1.21399251,  0.82391157],
        [ 0.92852066,  0.52905301, -0.7975909 , ..., -1.21934091,
          0.10179547,  0.96383929]]),
 'b1': array([[ 9.53513738e-07],
        [ 6.26595665e-08],
        [ 5.38934789e-07],
        [-1.35324050e-06],
        [-5.71499722e-07],
        [-2.49314903e-07],
        [ 3.83120720e-07],
        [-9.08160621e-07],
        [ 2.68697185e-06],
        [ 6.72599911e-08],
        [ 7.93214131e-07],
        [-9.95997755e-07],
        [ 1.22575450e-06],
        [ 