In [368]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [369]:
data = pd.read_csv("data/train.csv")

In [371]:
data = data.T
display(data)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
data_array = np.array(data)

test = data_array[0:1000,:]
train = data_array[1000:,:]

In [373]:
test_y = test[:,0]
train_y = train[:,0]

test_x = test[:,1:785] / 255.0
train_x = train[:,1:785] / 255.0

In [374]:
def ReLU(x):
    return np.maximum(0, x)

def leaky_relu(Z, alpha=0.02):
    return np.where(Z > 0, Z, alpha * Z)

def leaky_relu_deriv(Z, alpha=0.02):
    return np.where(Z > 0, 1, alpha)

def softmax(x):
    x_stable = x - np.max(x, axis=0, keepdims=True)
    exps = np.exp(x_stable)
    return exps / np.sum(exps, axis=0, keepdims=True)

In [375]:
W1 = np.random.randn(10, 784) * np.sqrt(2. / 784)
b1 = np.zeros((10, 1))
W2 = np.random.randn(10, 10) * np.sqrt(2. / 10)
b2 = np.zeros((10, 1))

In [376]:
def learn(W1,b1,W2,b2,lr,iterations):

    m = train_x.shape[0]

    A0 = train_x.T

    for it in range(iterations):
        # Forward pass
        Z1 = np.dot(W1, A0) + b1  # (10, m)
        A1 = leaky_relu(Z1)             # (10, m)

        percent_active = np.mean(A1 > 0) * 100
        print(f"% of active neurons after ReLU: {percent_active:.2f}%")


        Z2 = np.dot(W2, A1) + b2  # (10, m)
        A2 = softmax(Z2)          # (10, m)

        # Predictions
        Y_predicted = np.argmax(A2, axis=0)
        correct = np.sum(Y_predicted == train_y)
        accuracy = correct / m * 100

        Y_onehot = np.zeros((10, 41000))
        Y_onehot[train_y, np.arange(41000)] = 1

        # Backward pass
        dZ2 = A2 - Y_onehot                    # (10, m)
        dW2 = (1/m) * np.dot(dZ2, A1.T)        # (10, 10)
        db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)

        dA1 = np.dot(W2.T, dZ2)                # (10, m)
        dZ1 = dA1 * leaky_relu_deriv(Z1)                   # ReLU derivative
        dW1 = (1/m) * np.dot(dZ1, A0.T)        # (10, 784)
        db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

        grad_norms = {
            "||dW1||": np.linalg.norm(dW1),
            "||db1||": np.linalg.norm(db1),
            "||dW2||": np.linalg.norm(dW2),
            "||db2||": np.linalg.norm(db2),
        }

        # Update parameters
        W1 -= lr * dW1
        b1 -= lr * db1
        W2 -= lr * dW2
        b2 -= lr * db2
        if it % 10 == 0:
            print(f"Iteration {it+1}, Accuracy: {accuracy:.2f}%")
            print("Gradient Norms:", grad_norms)
    
    return W1,b1,W2,b2

In [385]:
W1, b1, W2, b2 = learn(W1,b1,W2,b2,0.01,500)

% of active neurons after ReLU: 88.04%
Iteration 1, Accuracy: 89.93%
Gradient Norms: {'||dW1||': 0.04549619264153146, '||db1||': 0.006344908647496067, '||dW2||': 0.030343464399306273, '||db2||': 0.004669844209249725}
% of active neurons after ReLU: 88.04%
% of active neurons after ReLU: 88.04%
% of active neurons after ReLU: 88.04%
% of active neurons after ReLU: 88.05%
% of active neurons after ReLU: 88.05%
% of active neurons after ReLU: 88.05%
% of active neurons after ReLU: 88.05%
% of active neurons after ReLU: 88.05%
% of active neurons after ReLU: 88.05%
% of active neurons after ReLU: 88.05%
Iteration 11, Accuracy: 89.93%
Gradient Norms: {'||dW1||': 0.04540194645174289, '||db1||': 0.006334605531897984, '||dW2||': 0.030262612253948044, '||db2||': 0.004660090860179256}
% of active neurons after ReLU: 88.05%
% of active neurons after ReLU: 88.06%
% of active neurons after ReLU: 88.06%
% of active neurons after ReLU: 88.06%
% of active neurons after ReLU: 88.06%
% of active neurons

In [386]:
# Input Layer
m = test_x.shape[0]

A0 = test_x.T

# Layer 1
Z1 = np.dot(W1,A0) + b1
A1 = leaky_relu(Z1)

# Layer 2
Z2 = np.dot(W2,A1) + b2
A2 = softmax(Z2) # 10 x m

# Output Layer

Y_predicted = np.argmax(A2, axis=0)
correct = np.sum(Y_predicted == test_y)
accuracy = correct / m * 100

In [387]:
print(accuracy)

90.60000000000001


In [388]:
def test_prediction(index,showImage = False):
    current_image = test_x[index,:].reshape((28,28)) * 255
    true_label = test_y[index]
    predicted_label = Y_predicted[index]
    print(f"The number is {true_label} and the model predicted {predicted_label}.\n\n")
    if showImage:
        plt.imshow(current_image,interpolation="nearest")

In [None]:
test_prediction(134)
test_prediction(1)
test_prediction(999)
test_prediction(560)
test_prediction(561)
test_prediction(562)
test_prediction(563) # wrong prediction
test_prediction(564)
test_prediction(565)

The number is 1 and the model predicted 1.


The number is 0 and the model predicted 0.


The number is 4 and the model predicted 4.


The number is 3 and the model predicted 3.


The number is 5 and the model predicted 5.


The number is 9 and the model predicted 9.


The number is 4 and the model predicted 9.


The number is 4 and the model predicted 4.


The number is 3 and the model predicted 3.


