# NumPy Neural Network on Fashion-MNIST

This notebook trains a small fully-connected neural network **from scratch in NumPy**
on Fashion-MNIST (inspired by Andrew Ng's *Neural Networks and Deep Learning* Course 1).

**How to use**
1. Run the next cell to define the model (DenseNet).
2. Run the last cell to load data and train for a few epochs.


In [None]:
import numpy as np

def softmax(z):
    z = z - z.max(axis=1, keepdims=True)
    e = np.exp(z)
    return e / (e.sum(axis=1, keepdims=True) + 1e-9)

def one_hot(y, k):
    Y = np.zeros((len(y), k), dtype=np.float32)
    Y[np.arange(len(y)), y] = 1.0
    return Y

class DenseNet:
    # layers e.g. [784, 128, 64, 10]
    def __init__(self, layers, lr=0.2, reg=1e-4, seed=42):
        self.lr = lr; self.reg = reg
        self.W, self.b = [], []
        rng = np.random.default_rng(seed)
        for i in range(len(layers)-1):
            fan_in = layers[i]
            w = rng.normal(0.0, np.sqrt(2.0/fan_in), size=(layers[i], layers[i+1])).astype(np.float32)
            b = np.zeros(layers[i+1], dtype=np.float32)
            self.W.append(w); self.b.append(b)

    def forward(self, X):
        self.a = [X.astype(np.float32)]
        self.z = []
        for i in range(len(self.W)):
            z = self.a[-1] @ self.W[i] + self.b[i]
            self.z.append(z)
            if i < len(self.W)-1:
                a = np.maximum(0.0, z)   # ReLU
            else:
                a = softmax(z)           # final softmax
            self.a.append(a)
        return self.a[-1]

    def loss_ce(self, Y_true, P):
        ce = -np.mean(np.sum(Y_true * np.log(P + 1e-9), axis=1))
        l2 = 0.5 * self.reg * sum((w*w).sum() for w in self.W)
        return ce + l2

    def backward(self, Y_true):
        grads_W = [None]*len(self.W)
        grads_B = [None]*len(self.b)
        m = Y_true.shape[0]
        delta = (self.a[-1] - Y_true) / m
        for i in reversed(range(len(self.W))):
            a_prev = self.a[i]
            grads_W[i] = (a_prev.T @ delta) + self.reg * self.W[i]
            grads_B[i] = delta.sum(axis=0)
            if i > 0:
                delta = (delta @ self.W[i].T) * (a_prev > 0)
        return grads_W, grads_B

    def step(self, gW, gB):
        for i in range(len(self.W)):
            self.W[i] -= self.lr * gW[i]
            self.b[i] -= self.lr * gB[i]


In [None]:
# Load Fashion-MNIST, train for 3 epochs, and report test accuracy
import numpy as np, time
from tensorflow.keras.datasets import fashion_mnist

# data
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = (X_train.astype("float32")/255.0).reshape(len(X_train), 784)
X_test  = (X_test.astype("float32")/255.0).reshape(len(X_test), 784)

# model
def one_hot(y, k):
    Y = np.zeros((len(y), k), dtype=np.float32)
    Y[np.arange(len(y)), y] = 1.0
    return Y

model = DenseNet([784,128,64,10], lr=0.2, reg=1e-4)
Y_train = one_hot(y_train, 10)

def acc(m, X, y, bs=2048):
    ok=0
    for i in range(0,len(X),bs):
        p = m.forward(X[i:i+bs]).argmax(1)
        ok += int((p==y[i:i+bs]).sum())
    return ok/len(X)

batch = 256
for ep in range(1, 4):
    perm = np.random.permutation(len(X_train))
    X_train, Y_train = X_train[perm], Y_train[perm]
    for i in range(0, len(X_train), batch):
        xb = X_train[i:i+batch]; yb = Y_train[i:i+batch]
        P = model.forward(xb)
        gW, gB = model.backward(yb)
        model.step(gW, gB)
    print(f"Epoch {ep:02d} | test_acc={acc(model, X_test, y_test):.3f}")
