In [380]:
import numpy as np
import pandas as pd

In [381]:
columns = [f"Feature{i}" for i in range(784)]
labels = [f"label{i}" for i in range(10)]

X_train = pd.read_csv("/home/mllab/Downloads/train_X.csv", names = columns)
X_test = pd.read_csv("/home/mllab/Downloads/test_X.csv", names = columns)
y_train = pd.read_csv("/home/mllab/Downloads/train_label.csv", names = labels).to_numpy()
y_test = pd.read_csv("/home/mllab/Downloads/test_label.csv", names = labels).to_numpy()

In [402]:
X_train = X_train / 255.0
X_test = X_test / 255.0

In [403]:
def parameter_initialize(layer_dims):
    np.random.seed(1)
    parameters = {}
    for l in range(1, len(layer_dims)):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
    return parameters

In [404]:
def ReLU(Z):
    return np.maximum(0, Z)

In [405]:
def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis = 0, keepdims = True))
    return expZ / expZ.sum(axis = 0, keepdims = True)

In [406]:
def forward_propagation(X, parameters):
    caches = {}
    A = X.T
    L = len(parameters) // 2 
    
    for l in range(1, L):
        Z = parameters['W' + str(l)].dot(A) + parameters['b' + str(l)]
        A = ReLU(Z)
        caches['A' + str(l)] = A
        caches['Z' + str(l)] = Z

    ZL = parameters['W' + str(L)].dot(A) + parameters['b' + str(L)]
    AL = softmax(ZL)
    caches['A' + str(L)] = AL
    caches['Z' + str(L)] = ZL

    return AL, caches

In [407]:
def compute_cost(AL, y):
    m = y.shape[0]
    cost = -np.sum(y.T * np.log(AL + 1e-8)) / m
    return np.squeeze(cost)

In [408]:
def ReLU_derivative(Z):
    return Z > 0

In [409]:
def backward_propagation(X, y, caches, parameters):
    grads = {}
    L = len(parameters) // 2
    m = X.shape[0]
    y = y.T
    A_prev = X.T

    dZL = caches['A' + str(L)] - y
    grads['dW' + str(L)] = dZL.dot(caches['A' + str(L-1)].T) / m
    grads['db' + str(L)] = np.sum(dZL, axis = 1, keepdims = True) / m

    for l in reversed(range(1, L)):
        dA = parameters['W' + str(l+1)].T.dot(dZL)
        dZ = dA * ReLU_derivative(caches['Z' + str(l)])
        A_prev = X.T if l == 1 else caches['A' + str(l-1)]
        grads['dW' + str(l)] = dZ.dot(A_prev.T) / m
        grads['db' + str(l)] = np.sum(dZ, axis = 1, keepdims = True) / m
        dZL = dZ

    return grads

In [410]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2
    for l in range(1, L + 1):
        parameters['W' + str(l)] -= learning_rate * grads['dW' + str(l)]
        parameters['b' + str(l)] -= learning_rate * grads['db' + str(l)]
    return parameters

In [411]:
def model(X, y, layer_dims, learning_rate = 0.01, num_epochs = 100):
    parameters = parameter_initialize(layer_dims)
    
    for epoch in range(num_epochs):

        AL, caches = forward_propagation(X, parameters)
        
        cost = compute_cost(AL, y)
        
        grads = backward_propagation(X, y, caches, parameters)
        
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if epoch % 10 == 0:
            print(f"Cost after epoch {epoch}: {cost:.4f}")
    
    return parameters

In [412]:
def predict(X, parameters):
    AL, _ = forward_propagation(X, parameters)
    predictions = np.argmax(AL, axis = 0)
    return predictions

In [413]:
def accuracy(predictions, y):
    true_labels = np.argmax(y, axis = 1)
    
    return np.mean(predictions == true_labels)

In [414]:
layer_dims = [784, 128, 10]

parameters = model(X_train, y_train, layer_dims, learning_rate = 0.08, num_epochs = 1000)

train_preds = predict(X_train, parameters)
test_preds = predict(X_test, parameters)

print(f"Train accuracy: {accuracy(train_preds, y_train) * 100:.2f}%")
print(f"Test accuracy: {accuracy(test_preds, y_test) * 100:.2f}%")

Cost after epoch 0: 2.3024
Cost after epoch 10: 2.2914
Cost after epoch 20: 2.2745
Cost after epoch 30: 2.2436
Cost after epoch 40: 2.1872
Cost after epoch 50: 2.0924
Cost after epoch 60: 1.9522
Cost after epoch 70: 1.7723
Cost after epoch 80: 1.5733
Cost after epoch 90: 1.3817
Cost after epoch 100: 1.2147
Cost after epoch 110: 1.0767
Cost after epoch 120: 0.9647
Cost after epoch 130: 0.8738
Cost after epoch 140: 0.7994
Cost after epoch 150: 0.7377
Cost after epoch 160: 0.6859
Cost after epoch 170: 0.6419
Cost after epoch 180: 0.6040
Cost after epoch 190: 0.5710
Cost after epoch 200: 0.5419
Cost after epoch 210: 0.5161
Cost after epoch 220: 0.4930
Cost after epoch 230: 0.4721
Cost after epoch 240: 0.4531
Cost after epoch 250: 0.4357
Cost after epoch 260: 0.4197
Cost after epoch 270: 0.4049
Cost after epoch 280: 0.3912
Cost after epoch 290: 0.3783
Cost after epoch 300: 0.3662
Cost after epoch 310: 0.3549
Cost after epoch 320: 0.3442
Cost after epoch 330: 0.3341
Cost after epoch 340: 0.3

In [441]:
prediction = predict(X_test.iloc[186:187, :], parameters)
print(prediction)
arr = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
arr[prediction[0]] += 1
print(arr)

[5]
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
