In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
from IPython.display import clear_output

#read DataSet and split digits and labels
train = pd.read_csv("mnist_train.csv")
test = pd.read_csv("mnist_test.csv")

print('read csv completed!')

Y_train = train['label'].values
Y_train = np.eye(10)[Y_train] #one hot
Y_train = np.matrix(Y_train)

X_train = train.drop('label', axis=1).values
X_train = np.matrix(X_train)

Y_test = test['label'].values
Y_test = np.eye(10)[Y_test] #one hot
Y_test = np.matrix(Y_test)

X_test = test.drop('label', axis=1).values
X_test = np.matrix(X_test)

print('Train data size:')
print(X_train.shape)
print('Test data size:')
print(X_test.shape)

X_train[X_train < 127] = 0
X_train[X_train >= 127] = 1
X_test[X_test < 127] = 0
X_test[X_test >= 127] = 1

print('preprocessing completed!')

In [None]:
N, D_in = X_train.shape
H1 = 65
H2 = 15
D_out = 10

learning_rate = 0.01
iteration = 40

pre_loss = float("inf")

N_test, D_in_test = X_test.shape

In [None]:
def test():
    truepositive = 0

    for n in range(N_test):

        x = X_test[n].T
        y = Y_test[n].T

        net1 = np.matmul(W1, x) + B1
        out1 = sigmoid(net1)

        net2 = np.matmul(W2, out1) + B2
        out2 = sigmoid(net2)

        net3 = np.matmul(W3, out2) + B3
        out3 = net3
        y_hat = out3

        if (np.argmax(y_hat) == np.argmax(y)):
            truepositive += 1  

    acc = truepositive / N_test
    return acc

In [None]:
W1 , W2 , W3 = np.random.randn(H1, D_in), np.random.randn(H2, H1), np.random.randn(D_out, H2)
B1 , B2 , B3 = np.random.randn(H1, 1) , np.random.randn(H2, 1) , np.random.randn(D_out, 1)

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(X):
    exps = np.exp(X)
    return exps / np.sum(exps)
  
for i in range(iteration):
  
    Y_hat = np.matrix(np.zeros(Y_train.shape))
  
    for n in range(N):
    
        x = X_train[n].T
        y = Y_train[n].T

        # forward

        net1 = np.matmul(W1, x) + B1
        out1 = sigmoid(net1)

        net2 = np.matmul(W2, out1) + B2
        out2 = sigmoid(net2)

        net3 = np.matmul(W3, out2) + B3
        out3 = net3
        y_hat = softmax(out3)

        Y_hat[n] = y_hat.T   

        # backprop
        
        grad_y = -2 * (y - y_hat)
        grad_B3 = grad_y
        grad_W3 = np.matmul(grad_y, out2.T)

        grad_out2 = np.matmul(W3.T, grad_y)
        grad_net2 = np.multiply(grad_out2, np.multiply(out2, (1-out2)))
        grad_B2 = grad_net2
        grad_W2 = np.matmul(grad_net2, out1.T)

        grad_out1 = np.matmul(W2.T, grad_net2)
        grad_net1 = np.multiply(grad_out1, np.multiply(out1, (1-out1)))
        grad_B1 = grad_net1
        grad_W1 = np.matmul(grad_net1, x.T)

        # update

        W1 = W1 - learning_rate * grad_W1
        W2 = W2 - learning_rate * grad_W2
        W3 = W3 - learning_rate * grad_W3

        B1 = B1 - learning_rate * grad_B1
        B2 = B2 - learning_rate * grad_B2
        B3 = B3 - learning_rate * grad_B3
    
    loss = np.sum(-np.sum(np.multiply(Y_train, np.log10(Y_hat))))
    print('i: ' + str(i) + ' loss: ' + str(loss) + ' acc: ' + str(test()) + ' learning_rate: ' + str(learning_rate))

    if(loss > pre_loss):
        learning_rate *= 0.9
    else:
        learning_rate *= 1.05
    
    pre_loss = loss
    
print('train completed!')