In [None]:
import pandas as pd
import numpy as np

In [None]:
data = pd.read_csv("./data/mnist_train.csv")
data = np.array(data)

labels_raw = data[:, 0]
labels = np.eye(10)[labels_raw]
images = data[:, 1:]
images = images / 255.0

In [None]:
input_size = 784
hidden_size_1 = 128
hidden_size_2 = 128
output_size = 10
try:
    weights_and_biases = np.load("./model/weights.npz")
    
    b1 = weights_and_biases['b1']
    w1 = weights_and_biases['w1']
    b2 = weights_and_biases['b2']
    w2 = weights_and_biases['w2']
    b3 = weights_and_biases['b3']
    w3 = weights_and_biases['w3']

except FileNotFoundError:
    b1 = np.zeros(hidden_size_1)
    w1 = np.random.randn(input_size, hidden_size_1) * 0.01
    b2 = np.zeros(hidden_size_2)
    w2 = np.random.randn(hidden_size_1, hidden_size_2) * 0.01
    b3 = np.zeros(output_size)
    w3 = np.random.randn(hidden_size_2, output_size) * 0.01
    b1 = np.zeros(hidden_size_1)
    w1 = np.random.randn(input_size, hidden_size_1) * 0.01
    b2 = np.zeros(hidden_size_2)
    w2 = np.random.randn(hidden_size_1, hidden_size_2) * 0.01
    b3 = np.zeros(output_size)
    w3 = np.random.randn(hidden_size_2, output_size) * 0.01

In [None]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

def d_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

def d_relu(x):
    return np.where(x > 0, 1, 0)

In [None]:
def forward_propagation(image_batch, w1, b1, w2, b2, w3, b3):
    z1 = np.dot(image_batch, w1) + b1
    a1 = relu(z1)
    
    z2 = np.dot(a1, w2) + b2
    a2 = relu(z2)
    
    z3 = np.dot(a2, w3) + b3
    y_pred = softmax(z3)
    
    return z1, a1, z2, a2, z3, y_pred

In [None]:
def loss_calculation(y_true, y_pred, batch_size, epsilon=1e-10):
    return (-1 / batch_size) * np.sum(y_true * np.log(y_pred + epsilon))

In [None]:
def backward_propagation(y_true, y_pred, image_batch, z1, a1, z2, a2, z3, w1, w2, w3, b1, b2, b3):
    e3 = y_pred - y_true
    dw3 = np.dot(a2.T, e3)
    db3 = np.sum(e3, axis = 0)
    
    e2 = np.dot(e3, w3.T) * d_relu(z2)
    dw2 = np.dot(a1.T, e2)
    db2 = np.sum(e2, axis = 0)
    
    e1 = np.dot(e2, w2.T) * d_relu(z1)
    dw1 = np.dot(image_batch.T, e1)
    db1 = np.sum(e1, axis = 0)
    
    return dw3, db3, dw2, db2, dw1, db1

In [None]:
def update_parameters(learning_rate, w1, w2, w3, b1, b2, b3, dw1, dw2, dw3, db1, db2, db3):
    w1 -= dw1 * learning_rate
    b1 -= db1 * learning_rate
    w2 -= dw2 * learning_rate
    b2 -= db2 * learning_rate
    w3 -= dw3 * learning_rate
    b3 -= db3 * learning_rate
    
    return w1, w2, w3, b1, b2, b3

In [12]:
learning_rate = 0.001
epochs = 200
batch_size = 60

for epoch in range(epochs):
    for i in range(0, len(images), batch_size):
        batch_images = images[i:i+batch_size]
        batch_labels = labels[i:i+batch_size]

        z1, a1, z2, a2, z3, y_pred = forward_propagation(batch_images, w1, b1, w2, b2, w3, b3)

        loss = loss_calculation(batch_labels, y_pred, batch_size)

        dw3, db3, dw2, db2, dw1, db1 = backward_propagation(batch_labels, y_pred, batch_images, z1, a1, z2, a2, z3, w1, w2, w3, b1, b2, b3)

        w1, w2, w3, b1, b2, b3 = update_parameters(learning_rate, w1, w2, w3, b1, b2, b3, dw1, dw2, dw3, db1, db2, db3)

    weights_and_biases = {
        "b1": b1,
        "w1": w1,
        "b2": b2,
        "w2": w2,
        "b3": b3,
        "w3": w3
    }
    np.savez("./model/weights.npz", **weights_and_biases)
    
    print(f"Epoch: {epoch}, Loss: {loss}")

Epoch: 0, Loss: 0.002252530529977633
Epoch: 1, Loss: 0.0031937600052444138
Epoch: 2, Loss: 0.0026350256948323453
Epoch: 3, Loss: 0.002311421156375993
Epoch: 4, Loss: 0.002275652934013489
Epoch: 5, Loss: 0.0019711665530403725
Epoch: 6, Loss: 0.0017708402127275323
Epoch: 7, Loss: 0.0016830557597379448
Epoch: 8, Loss: 0.0015650214798410976
Epoch: 9, Loss: 0.0014392666133716421
Epoch: 10, Loss: 0.0013592623020034864
Epoch: 11, Loss: 0.0012861785663351249
Epoch: 12, Loss: 0.001207923540777294
Epoch: 13, Loss: 0.0011109420092897235
Epoch: 14, Loss: 0.0010695278162349048
Epoch: 15, Loss: 0.0010164678600786865
Epoch: 16, Loss: 0.0009612516830931794


KeyboardInterrupt: 