In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [None]:
#read data in csv
data = pd.read_csv('mnist_train.csv')

In [None]:
training_data = np.array(data)
m,n = training_data.shape
print(m,n)

In [None]:
np.random.shuffle(training_data)

In [None]:
#Train Test Split
data_Val = training_data[0:9000].T
Y_Val = data_Val[0]
X_Val = data_Val[1:]
'''X_dev = X_dev / 255.'''

Test_data = training_data[9000:18000].T
Y_test = Test_data[0]
X_test = Test_data[1:]


data_train = training_data[18000:].T
Y_train = data_train[0]
X_train = data_train[1:]
'''X_train = X_train / 255.
_,m_train = X_train.shape'''

In [None]:
X_train.shape

In [None]:
Y_train.shape

In [None]:
def sigmoid(x):
        return 1/(np.exp(-x)+1) 

##Activation Derivative-Sigmoid Function
def d_sigmoid(x):
    #return (np.exp(-x))/((np.exp(-x)+1)**2)
    return(np.multiply(sigmoid(x),1-sigmoid(x)))

In [None]:
def init_params():
    first_layer = 784
    hidden_layer = 30
    final_layer = 10

    limit = 1/(np.sqrt(784))
    W1 = np.random.uniform(low = -limit,high=limit,size=(hidden_layer,first_layer))
    b1 = np.random.uniform(low = -limit,high=limit,size=(hidden_layer,1))
    
    limit2 = 1/(np.sqrt(30))
    W2 = np.random.uniform(low = -limit,high=limit,size=(final_layer,hidden_layer))
    b2 = np.random.uniform(low = -limit,high=limit,size=(final_layer,1))
    
##Old intialization of weights.

    #W1 = np.random.randn(30, 784) * 0.5
    #b1 = np.random.randn(30,1) * 0.5
    #W2 = np.random.randn(10, 30) * 0.5
    #b2 = np.random.randn(10, 1) * 0.5
    return W1, b1, W2, b2

In [None]:
def Convert_Y(Y):
    ConvertY = np.zeros((Y.size, 10))
    ConvertY[np.arange(Y.size), Y] = 1
    ConvertY = ConvertY.T
    return ConvertY

In [None]:
def Forward_Propogation(W1,W2,b1,b2,x):
        
    z1 = W1.dot(x) + b1     
    #print("z1 shape" ,z1.shape)    
    a1 = sigmoid(z1) 
    #print("a1 shape" ,a1.shape)
    z2 = W2.dot(a1) + b2    
    #print("z2 shape" , z2.shape)
    a2 = sigmoid(z2) 
    #print("a2 shape" , a2.shape)
    return z1,a1,z2,a2

In [None]:
def Backward_propogation(x,y,W2,W1,a2,a1,z2,z1):

    Y = Convert_Y(y)
    delta2 = 1/len(y) * (a2 - Y)* d_sigmoid(z2)
    dW2 = (delta2.dot(a1.T))
    db2 = np.sum(delta2)
     
    delta1 = W2.T.dot(delta2) * d_sigmoid(z1)
    dW1 = (delta1.dot(x.T))
    db1 = np.sum(delta1)
    
    return dW2, dW1, db2,db1


In [None]:
#MSE Loss Calculation
def calc_loss(A2, Y):

    Y = Convert_Y(Y)
    error = (A2 - Y) * (A2-Y) 
    error = 1/Y.size * np.sum(error)
    #print(error)
    return error

In [None]:
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, lr):
    W1 = W1 - lr * dW1
    b1 = b1 - lr * db1    
    W2 = W2 - lr * dW2  
    b2 = b2 - lr * db2    
   # print(W1,b1,W2,b2)
    return W1, b1, W2, b2

In [None]:
def Accuracy_Check(A2,Y):
    return 100 * np.sum(np.argmax(A2,0) == Y)/Y.size

In [None]:
def train(iterations=50,learning=0.035,batch_size = 30):
    
    #print(X_train.shape)
    epochs = iterations
    lr = learning
    batch = batch_size
    loss = []
    epoch_acc = []
    

    W1, b1, W2, b2 = init_params()
    
    batch_x = np.split(X_train,int(X_train.shape[1]/batch),axis=1)
    batch_y = np.split(Y_train,int(X_train.shape[1]/batch),axis=0)
    
    for i in range(epochs):
        loss_val = []
        acc = []
        for j in range(int(X_train.shape[1]/batch)):
            x = batch_x[j]
            y = batch_y[j]

            Z1, A1, Z2, A2 = Forward_Propogation(W1, W2,b1, b2, x)
            
            dW2,dW1,db2,db1 = Backward_propogation(x,y,W2,W1,A2, A1, Z2,Z1)
            W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, lr) 
           
            loss_val.append(calc_loss(A2,y))
            acc.append(Accuracy_Check(A2,y))

        loss.append(np.average(loss_val))
        epoch_acc.append(np.average(acc))

        print("Epoch Completed: " , i)
        print("Accuracy after",i," Epoch: ",np.average(acc))
        print("Loss after ",i," Epoch: ",np.average(loss_val))

    plt.figure(dpi = 125)
    plt.plot(loss)
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    
    plt.figure(dpi = 125)
    plt.plot(epoch_acc)
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")


    return W1,b1,W2,b2
        

In [None]:
#Training execution statement
w1,b1,w2,b2 = train(50,0.03,12)

In [None]:
#Validation Accuracy
def validation_prpediction(w1,b1,w2,b2,x,y):
    Z1, A1, Z2, A2 = Forward_Propogation(w1,w2,b1, b2, x)
    print(y)
    print("Validation Accuracy: ",Accuracy_Check(A2,y))

validation_prpediction(w1,b1,w2,b2,X_Val,Y_Val)

In [None]:
#Test Accuracy
def Test_Prediction(w1,b1,w2,b2,x,y):
    Z1, A1, Z2, A2 = Forward_Propogation(w1,w2,b1, b2, x)
    print(y)
    print("Test Accuracy: ",Accuracy_Check(A2,y))

Test_Prediction(w1,b1,w2,b2,X_test,Y_test)