In [3]:
import pandas as pd 
import numpy as np
import matplotlib as plt
from sklearn.model_selection import train_test_split

train_df = pd.read_csv("fashion-mnist_train.csv")
test_df =  pd.read_csv("fashion-mnist_test.csv")

In [None]:
X = train_df.drop(columns=['label']).values / 255.0 
y = train_df['label'].values
X_train, X_dev, y_train, y_dev = train_test_split(X,y, test_size=0.1, random_state=42)
X_train = X_train.T  # Shape (784, 54000)
X_dev = X_dev.T      # Shape (784, 6000)

In [None]:
def ReLU(Z):
    return np.maximum(0, Z)

def softmax(Z):
    shifted_Z = Z - np.max(Z, axis=0, keepdims=True)
    exp_Z = np.exp(shifted_Z)
    return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)

def init_params():
    W1 = np.random.randn(128, 784) * np.sqrt(2.0 / 784)
    b1 = np.zeros((128, 1))
    W2 = np.random.randn(10, 128) * np.sqrt(2.0 / 128)
    b2 = np.zeros((10, 1))
    return W1, b1, W2, b2

def ReLU_deriv(Z):
    return (Z > 0).astype(float)

def one_hot(Y):
    one_hot_y = np.zeros((Y.size, Y.max()+1))
    one_hot_y[np.arange(Y.size), Y] = 1
    return one_hot_y.T  


def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1 
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    m = X.shape[1]
    one_hot_Y = one_hot(Y)
    
    dZ2 = A2 - one_hot_Y
    dW2 = (1/m) * dZ2.dot(A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = (1/m) * dZ1.dot(X.T)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
    
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 -= alpha * dW1
    b1 -= alpha * db1
    W2 -= alpha * dW2
    b2 -= alpha * db2
    return W1, b1, W2, b2

def get_predictions(A2):
    return np.argmax(A2, axis=0)

def get_accuracy(predictions, y_true):
    return np.mean(predictions == y_true)

In [None]:
def gradient_descent(X_train, y_train, X_dev, y_dev, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    batch_size = 128
    m = X_train.shape[1]  # Total samples: 54000
    
    for i in range(iterations):
        # Mini-Batch Training
        permutation = np.random.permutation(m)
        X_shuffled = X_train[:, permutation]
        y_shuffled = y_train[permutation]
        
        for start in range(0, m, batch_size):
            end = start + batch_size
            X_batch = X_shuffled[:, start:end]
            y_batch = y_shuffled[start:end]
            
            # Forward/Backward Pass
            Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X_batch)
            dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X_batch, y_batch)
            W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        
        # Track Progress
        if i % 50 == 0:
            train_preds = get_predictions(forward_prop(W1, b1, W2, b2, X_train)[3])
            dev_preds = get_predictions(forward_prop(W1, b1, W2, b2, X_dev)[3])
            print(f"Iter {i}: Train Acc={get_accuracy(train_preds, y_train):.3f},Dev Acc={get_accuracy(dev_preds, y_dev):.3f}")
    
    return W1, b1, W2, b2

In [None]:
W1, b1, W2, b2 = gradient_descent(
    X_train, y_train,
    X_dev, y_dev,
    alpha=0.01,  
    iterations=250
)

Iter 0: Train Acc=0.747,Dev Acc=0.754
Iter 50: Train Acc=0.880,Dev Acc=0.872
Iter 100: Train Acc=0.898,Dev Acc=0.882
Iter 150: Train Acc=0.907,Dev Acc=0.885
Iter 200: Train Acc=0.919,Dev Acc=0.886


In [None]:
def test_model(W1, b1, W2, b2, X_test, y_test):
    X_test = X_test.T  # (784, m_test)
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X_test)
    predictions = get_predictions(A2)
    accuracy = get_accuracy(predictions, y_test)
    print(f"Final Test Accuracy: {accuracy:.3f}")

X_test = test_df.drop(columns=['label']).values / 255.0
y_test = test_df['label'].values

test_model(W1, b1, W2, b2, X_test.reshape(-1, 784), y_test)

NameError: name 'test_df' is not defined