In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [15]:
data_=pd.read_csv('train.csv')


data=np.array(data_)
m,n=data.shape
np.random.shuffle(data)

data_dev=data[0:1000].T
y_dev=data_dev[0]
x_dev=data_dev[1:n]

data_train=data[1000:m].T
y_train=data_train[0]
x_train=data_train[1:n]

In [56]:
x_dev.shape

(784, 1000)

In [48]:
print(x_train.shape)

(41000, 784)


In [67]:
def init_params():
    W1 = np.random.randn(784, 10) * 0.01
    b1 = np.zeros((1, 10))
    W2 = np.random.randn(10, 10) * 0.01
    b2 = np.zeros((1, 10))
    return W1, b1, W2, b2

def ReLU(z):
    return np.maximum(0, z)

def d_ReLU(z):
    return z > 0

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # for numerical stability
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def forward_prop(W1, b1, W2, b2, X):
    z1 = np.dot(X, W1) + b1
    a1 = ReLU(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = softmax(z2)
    return z1, a1, z2, a2

def one_hot(Y, num_classes):
    one_hot_Y = np.zeros((Y.size, num_classes))
    one_hot_Y[np.arange(Y.size), Y] = 1
    return one_hot_Y

def back_prop(Z1, A1, Z2, A2, W2, X, Y):
    m = Y.size
    one_hot_Y = one_hot(Y, A2.shape[1])
    dz2 = A2 - one_hot_Y
    dW2 = np.dot(A1.T, dz2) / m
    db2 = np.sum(dz2, axis=0, keepdims=True) / m
    
    dz1 = np.dot(dz2, W2.T) * d_ReLU(Z1)
    dW1 = np.dot(X.T, dz1) / m
    db1 = np.sum(dz1, axis=0, keepdims=True) / m
    
    return dW1, db1, dW2, db2

def update_params(W1, dW1, b1, db1, W2, dW2, b2, db2, alpha):
    W1 -= alpha * dW1
    b1 -= alpha * db1
    W2 -= alpha * dW2
    b2 -= alpha * db2
    return W1, b1, W2, b2

def train(X, Y, epochs, alpha):
    W1, b1, W2, b2 = init_params()
    for i in range(epochs):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = back_prop(Z1, A1, Z2, A2, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, dW1, b1, db1, W2, dW2, b2, db2, alpha)
        
        if i % 100 == 0:
            predictions = np.argmax(A2, axis=1)
            accuracy = np.mean(predictions == Y)
            print(f"Iteration {i}: Accuracy = {accuracy * 100:.2f}%")
    return W1, b1, W2, b2

def predict(W1, b1, W2, b2, X):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    return np.argmax(A2, axis=1)



In [68]:
W1, b1, W2, b2 = train(x_train, y_train, epochs=1000, alpha=0.1)
predictions = predict(W1, b1, W2, b2, x_train)

Iteration 0: Accuracy = 11.06%
Iteration 100: Accuracy = 11.17%
Iteration 200: Accuracy = 11.17%
Iteration 300: Accuracy = 11.17%
Iteration 400: Accuracy = 11.17%
Iteration 500: Accuracy = 11.17%
Iteration 600: Accuracy = 11.17%
Iteration 700: Accuracy = 11.17%
Iteration 800: Accuracy = 11.17%
Iteration 900: Accuracy = 11.17%


In [71]:
yhat=predict(W1, b1, W2, b2, x_dev.T)
print(np.mean(yhat!=y_dev))

0.894
