### Initializations and Data Preparation

In [39]:
# imports

import numpy as np
import pandas as pd

In [40]:
# data loading and rescaling

df = pd.read_csv('mnist_train.csv')
x_train = (df.iloc[:, 1:].values) / 255
y_train = df.iloc[:, 0].values

In [46]:
# hyperparameters

learning_rate = 0.01
num_epochs = 20

# we have 784 input units, 28 hidden units and 1 output unit (only classifying whether digit in 2 or 7)
h = 28
n = 784

# initialize weights and biases
# % Code attribution: Hongtao Hao's P1 example solution.
w1 = np.random.uniform(low=-1, high=1, size=(n, h))
w2 = np.random.uniform(low=-1, high=1, size=(h, 1))
b1 = np.random.uniform(low=-1, high=1, size=(h, 1))
b2 = np.random.uniform(low=-1, high=1, size=(1, 1))

### Extract Training data of digits 2 and 7

In [42]:
# % Code attribution: Hongtao Hao's P1 example solution.

test_labels = [2, 7]
indices = np.where(np.isin(y_train, test_labels))[0]

# get the indices of the training data that have labels 2 and 7
x = x_train[indices]
y = y_train[indices]

# label 2 as 0 and label 7 as 1
y[y == test_labels[0]] = 0
y[y == test_labels[1]] = 1

### Utility Functions

In [43]:
# sigmoid activation

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [44]:
# derivative of sigmoid function

def sigmoid_derivative(y):
    return y * (1 - y)

In [47]:
# Training for num_epochs
# % Code attribution: Hongtao Hao's P1 example solution.

for epoch in range(1, num_epochs + 1):
    # shuffle the dataset
    num_train = len(y)
    train_index = np.arange(num_train)
    np.random.shuffle(train_index)

    for i in train_index:
        # a1 will be of the dimension of 28 * 1
        a1 = sigmoid(w1.T @ x[i, :].reshape(-1, 1) + b1)
        # a2 is a 1*1 matrix
        a2 = sigmoid(w2.T @ a1 + b2)

        # dCdw1 will be a 28 * 784 matrix
        dCdw1 = (
            (a2 - y[i])
            * sigmoid_derivative(a2)
            * w2
            * sigmoid_derivative(a1)
            * (x[i, :].reshape(1, -1))
        )

        # dCdb1 will be a 28 * 1 matrix
        dCdb1 = (
            (a2 - y[i]) * sigmoid_derivative(a2) * w2 * sigmoid_derivative(a1)
        )

        # dCdw2 will be a a 28 * 1 matrix
        dCdw2 = (a2 - y[i]) * sigmoid_derivative(a2) * a1

        # dCdb2 will be a 1*1 matrix
        dCdb2 = (a2 - y[i]) * sigmoid_derivative(a2)

        # update w1, b1, w2, b2
        w1 = w1 - learning_rate * dCdw1.T
        b1 = b1 - learning_rate * dCdb1
        w2 = w2 - learning_rate * dCdw2
        b2 = b2 - learning_rate * dCdb2
    
    # x @ w1 will be num_train * 28 matrix
    # the output of the hidden layer will be a num_train * 28 matrix
    out_h = sigmoid(x @ w1 + b1.T)
    # the output of the output layer will be a num_train * 1 matrix
    out_o = sigmoid(out_h @ w2 + b2)

    loss = 0.5 * np.sum(np.square(y.reshape(-1, 1) - out_o))

    correct = sum((out_o > 0.5).astype(int) == y.reshape(-1, 1))
    accuracy = (correct / num_train)[0]
    
    # log results
    print(f'Epoch {epoch}: Loss = {loss:.4f}, Accuracy = {accuracy:.4f}')

Epoch 1: Loss = 245.2080, Accuracy = 0.9557
Epoch 2: Loss = 178.8658, Accuracy = 0.9652
Epoch 3: Loss = 153.0950, Accuracy = 0.9687
Epoch 4: Loss = 137.0134, Accuracy = 0.9721
Epoch 5: Loss = 125.4655, Accuracy = 0.9750
Epoch 6: Loss = 116.4436, Accuracy = 0.9760
Epoch 7: Loss = 109.4437, Accuracy = 0.9781
Epoch 8: Loss = 103.6423, Accuracy = 0.9786
Epoch 9: Loss = 98.8629, Accuracy = 0.9799
Epoch 10: Loss = 94.3473, Accuracy = 0.9804
Epoch 11: Loss = 90.7350, Accuracy = 0.9812
Epoch 12: Loss = 87.2630, Accuracy = 0.9824
Epoch 13: Loss = 84.6859, Accuracy = 0.9826
Epoch 14: Loss = 81.5992, Accuracy = 0.9833
Epoch 15: Loss = 79.8560, Accuracy = 0.9835
Epoch 16: Loss = 76.8191, Accuracy = 0.9845
Epoch 17: Loss = 74.9394, Accuracy = 0.9851
Epoch 18: Loss = 72.7076, Accuracy = 0.9855
Epoch 19: Loss = 70.9418, Accuracy = 0.9859
Epoch 20: Loss = 69.1192, Accuracy = 0.9858


### Questions

In [54]:
# question 5

params = np.concatenate((w1, b1.T))

np.savetxt('q5.txt', params, fmt='%.4f', delimiter=',')

In [56]:
# question 6

params = np.concatenate((w2, b2))

np.savetxt('q6.txt', params.reshape(1,-1), fmt='%.4f', delimiter=',')

In [57]:
# question 7

# load test data from assignment
test = np.loadtxt("test.txt", delimiter=",")
test = test / 255.0

# calculate activations
a = sigmoid(test @ w1 + b1.T)
a = sigmoid(a @ w2 + b2)

np.savetxt('q7.txt', a.reshape(1, -1), fmt='%.2f', delimiter=',')

In [53]:
# question 8

preds = np.array([1 if activation >= 0.5 else 0 for activation in a])

np.savetxt('q8.txt', preds.reshape(1, -1), fmt='%d', delimiter=',')