In [None]:
import numpy as np
from matplotlib import pyplot as plt

In [None]:
test_Set = np.loadtxt('mnist_test.csv', skiprows=1, delimiter=',')
train_Set = np.loadtxt('mnist_train.csv', skiprows=1, delimiter=',')

In [None]:
#Separate features from labels
X_train = (train_Set[:,1:]).T
Y_train = (train_Set[:,0]).T

X_test = (test_Set[:,1:]).T
Y_test = (test_Set[:,0]).T

X_train = X_train[:,:10000]
X_test = X_test[:,:10000]

Y_train = Y_train[:10000]
Y_test = Y_test[:10000]

Y_train = np.rint(Y_train).astype(int)
Y_test = np.rint(Y_test).astype(int)

print("X_train.shape  = ", X_train.shape)
print("Y_train.shape  = ", Y_train.shape)
print("X_test.shape   = ", X_test.shape)
print("Y_test.shape   = ", Y_test.shape)

In [None]:
def one_hot(Y_in):
    Y_new = np.zeros((int(Y_in.max() + 1), Y_in.size))
    Y_new[Y_in.astype(int), np.arange(Y_in.size)] = 1
    return Y_new
Y_train = one_hot(Y_train)

def tanh(Z):
    return np.tanh(Z)

#Softmax
def softmax(Z):
    #Modified to prevent overflow in np.exp(Z)
    Z -= np.max(Z, axis=0, keepdims=True)
    exp_Z = np.exp(Z)
    return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)

def compute_cost(Y, A3):
    m = Y.shape[1]
    return -(1 / m) * np.sum(Y * np.log(A3))

In [None]:
def init_params(num_inputs, num_hidden1, num_hidden2, num_classes):
    W1 = np.random.randn(num_hidden1, num_inputs) * np.sqrt(2 / num_inputs)
    b1 = np.zeros((num_hidden1, 1))

    W2 = np.random.randn(num_hidden2, num_hidden1) * np.sqrt(2 / num_hidden1)
    b2 = np.zeros((num_hidden2, 1))

    W3 = np.random.randn(num_classes, num_hidden2) * np.sqrt(2 / num_hidden2)
    b3 = np.zeros(shape=(num_classes, 1))

    return W1, b1, W2, b2, W3, b3

def forward_prop(W1, b1, W2, b2, W3, b3, X):
    Z1 = np.dot(W1, X) + b1
    A1 = tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = tanh(Z2)
    Z3 = np.dot(W3, A2) + b3
    A3 = softmax(Z3)

    return Z1, A1, Z2, A2, Z3, A3

def backprop(X, Y, A3, A2, A1, W3, W2):
    m = X.shape[1]

    dZ3 = A3 - Y
    dW3 = (1 / m) * np.dot(dZ3, A2.T)
    db3 = (1 / m) * np.sum(dZ3, axis=1, keepdims=True)

    dA2 = np.dot(W3.T, dZ3)
    dZ2 = dA2 * (1 - A2**2)
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * (1 - A1**2)
    dW1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

    return dW1, db1, dW2, db2, dW3, db3

def update_params(W, b, dW, db, learning_rate):
    W1, W2, W3 = W[1], W[2], W[3]
    b1, b2, b3 = b[1], b[2], b[3]

    dW1, dW2, dW3 = dW[1], dW[2], dW[3]
    db1, db2, db3 = db[1], db[2], db[3]
    
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1

    return W1, b1, W2, b2, W3, b3

In [None]:
num_inputs = 784
num_hidden1 = 10
num_hidden2 = 10
num_classes = 10
m = Y_train.size

learning_rate = 0.1
num_iters = 1000

W1, b1, W2, b2, W3, b3 = init_params(num_inputs, num_hidden1, num_hidden2, num_classes)
for i in range(num_iters):
    Z1, A1, Z2, A2, Z3, A3 = forward_prop(W1, b1, W2, b2, W3, b3, X_train)
    
    # Compute the cost
    cost = compute_cost(Y_train, A3)
    
    # Backpropagation
    dW1, db1, dW2, db2, dW3, db3 = backprop(X_train, Y_train, A3, A2, A1, W3, W2)
    
    # Update parameters
    W, b, dW, db = {}, {}, {}, {}

    W[1], W[2], W[3] = W1, W2, W3
    b[1], b[2], b[3] = b1, b2, b3
    dW[1], dW[2], dW[3] = dW1, dW2, dW3
    db[1], db[2], db[3] = db1, db2, db3

    W1, b1, W2, b2, W3, b3 = update_params(W, b, dW, db, learning_rate)
    
    if i % 10 == 0:
        print(f"Iteration {i}, Cost: {cost}")

Y_pred = np.argmax(A3, axis=0)
Y = np.argmax(Y_train, axis = 0)
incorrect = 0
for i in range(Y_pred.size):
    if Y[i]- Y_pred[i] != 0:
        incorrect += 1

In [None]:
image = X_train.reshape(28,28, 10000)
accuracy = ((Y_pred.size -incorrect) / Y_pred.size) * 100
print(f"Model Accuracy: {accuracy:.2f}\n")
for i in range(25):
    plt.imshow(image[:,:,i], cmap='binary')
    plt.axis('off')
    plt.show()
    print("Logits:", Z3[:, i], '\n')
    print("Softmax:", A3[:,i],'\n')
    print("Prediction:", Y_pred[i],'\n')