In [0]:
# Modules
from sklearn.datasets import fetch_openml
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

In [0]:
# Load MNIST Data
mnist = fetch_openml('mnist_784')
X, y = mnist["data"], mnist["target"]

In [0]:
# Data - PreProcessing
# Feature Scaling of X(Independant) data
X = X / 255.0

# One Hot Encoding of Y(dependant) data
total = y.shape[0]
y = y.reshape(1, total)
Y_new = np.eye(10)[y.astype('int32')]
Y = Y_new.T.reshape(10, total)

In [0]:
# Data splitted into train and test datasets
ntd = 60000
X_train, X_test = X[:ntd].T, X[ntd:].T
Y_train, Y_test = Y[:,:ntd], Y[:,ntd:]

In [0]:
# Hyper Parameters
n_x = 784
n_hn = 64
lr = 4
batch_size = 128
beta = 0.7
batches = ntd // batch_size
epochs = 10

In [0]:
# Weights & Bias Initialization  (He Initialization)
init =  { 
           "W1": np.random.randn(n_hn, n_x) * np.sqrt(1. / n_x),
           "b1": np.zeros((n_hn, 1)) * np.sqrt(1. / n_x),
           "W2": np.random.randn(10, n_hn) * np.sqrt(1. / n_hn),
           "b2": np.zeros((10, 1)) * np.sqrt(1. / n_hn) 
        }

In [0]:
# Optimizer init (Momentum)
V_dW1 = np.zeros(init["W1"].shape)
V_db1 = np.zeros(init["b1"].shape)
V_dW2 = np.zeros(init["W2"].shape)
V_db2 = np.zeros(init["b2"].shape)

In [0]:
# Activation func , Loss func , Forward pass , Backward pass
def sigmoid(z):
    return 1. / (1. + np.exp(-z))

def compute_loss(Y, Y_hat):
    l = np.sum(Y * (np.log(Y_hat)))
    return -(1./Y.shape[1]) * l

def feed_forward(X, init):
    Z1 = np.matmul(init["W1"], X) + init["b1"]
    A1 = sigmoid(Z1)
    Z2 = np.matmul(init["W2"], A1) + init["b2"]
    A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=0)
    dic = {"Z1":Z1, "A1":A1, "Z2":Z2, "A2":A2}
    return dic

def back_propagate(X, Y, init, dic):
    dZ2 = dic["A2"] - Y
    dW2 = (1./batch_size) * np.matmul(dZ2, dic["A1"].T)
    db2 = (1./batch_size) * np.sum(dZ2, axis=1, keepdims=True)
    dA1 = np.matmul(init["W2"].T, dZ2)
    dZ1 = dA1 * sigmoid(dic["Z1"]) * (1 - sigmoid(dic["Z1"]))
    dW1 = (1./batch_size) * np.matmul(dZ1, X.T)
    db1 = (1./batch_size) * np.sum(dZ1, axis=1, keepdims=True)
    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    return grads

In [36]:
# train
for i in range(epochs):

    new_index = np.random.permutation(ntd)
    X_train, Y_train = X_train[:, new_index], Y_train[:, new_index]
    for j in range(batches):

        begin = j * batch_size
        end = begin + batch_size
        X = X_train[:, begin:end]
        Y = Y_train[:, begin:end]
        dic = feed_forward(X, init)
        grads = back_propagate(X, Y, init, dic)

        V_dW1 = (beta * V_dW1 + (1. - beta) * grads["dW1"])
        V_db1 = (beta * V_db1 + (1. - beta) * grads["db1"])
        V_dW2 = (beta * V_dW2 + (1. - beta) * grads["dW2"])
        V_db2 = (beta * V_db2 + (1. - beta) * grads["db2"])

        init["W1"] -= lr * V_dW1
        init["b1"] -= lr * V_db1
        init["W2"] -= lr * V_dW2
        init["b2"] -= lr * V_db2

    dic = feed_forward(X_train, init)
    train_cost = compute_loss(Y_train, dic["A2"])
    dic = feed_forward(X_test, init)
    test_cost = compute_loss(Y_test, dic["A2"])
    print("Epoch {}: training cost = {}, test cost = {}".format(i+1 ,train_cost, test_cost))

print("Report :")

dic = feed_forward(X_test, init)
predictions = np.argmax(dic["A2"], axis=0)
labels = np.argmax(Y_test, axis=0)

print(classification_report(predictions, labels))

Epoch 1: training cost = 0.14695942976786477, test cost = 0.15316682827608508
Epoch 2: training cost = 0.09810110254813233, test cost = 0.11409289504490983
Epoch 3: training cost = 0.07479689385473891, test cost = 0.10193870409883977
Epoch 4: training cost = 0.06255561093864297, test cost = 0.09909269296085231
Epoch 5: training cost = 0.048807986600367, test cost = 0.08378839566323108
Epoch 6: training cost = 0.04349833769877289, test cost = 0.08887109101472838
Epoch 7: training cost = 0.04004782368960049, test cost = 0.08627839113437202
Epoch 8: training cost = 0.03008310106709013, test cost = 0.0844433034352861
Epoch 9: training cost = 0.03186298477064734, test cost = 0.08852843931284714
Epoch 10: training cost = 0.023971687075653766, test cost = 0.08781837792855123
Report :
              precision    recall  f1-score   support

           0       0.98      0.97      0.98       990
           1       0.99      0.99      0.99      1130
           2       0.96      0.98      0.97      