In [None]:
import numpy as np

# Example small training (for demo, not full CIFAR-10 yet)

# Load CIFAR-10 with keras for simplicity (30MB)
from tensorflow.keras.datasets import cifar10

In [None]:
class Conv2D:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        # Xavier init
        limit = np.sqrt(6 / (in_channels * kernel_size * kernel_size + out_channels))
        self.W = np.random.uniform(-limit, limit, 
                                   (out_channels, in_channels, kernel_size, kernel_size))
        self.b = np.zeros((out_channels, 1))
        
    def forward(self, x):
        self.x = x
        N, C, H, W = x.shape
        k, s, p = self.kernel_size, self.stride, self.padding
        
        H_out = (H - k + 2*p)//s + 1
        W_out = (W - k + 2*p)//s + 1
        self.out = np.zeros((N, self.out_channels, H_out, W_out))
        
        x_padded = np.pad(x, ((0,0),(0,0),(p,p),(p,p)), mode="constant")
        
        for n in range(N):
            for c_out in range(self.out_channels):
                for i in range(H_out):
                    for j in range(W_out):
                        h_start, w_start = i*s, j*s
                        patch = x_padded[n, :, h_start:h_start+k, w_start:w_start+k]
                        self.out[n, c_out, i, j] = np.sum(patch * self.W[c_out]) + self.b[c_out]
        return self.out
    
    def backward(self, d_out, lr=0.001):
        N, C, H, W = self.x.shape
        k, s, p = self.kernel_size, self.stride, self.padding
        H_out, W_out = d_out.shape[2], d_out.shape[3]
        
        dx = np.zeros_like(self.x)
        dW = np.zeros_like(self.W)
        db = np.zeros_like(self.b)
        
        x_padded = np.pad(self.x, ((0,0),(0,0),(p,p),(p,p)), mode="constant")
        dx_padded = np.pad(dx, ((0,0),(0,0),(p,p),(p,p)), mode="constant")
        
        for n in range(N):
            for c_out in range(self.out_channels):
                for i in range(H_out):
                    for j in range(W_out):
                        h_start, w_start = i*s, j*s
                        patch = x_padded[n, :, h_start:h_start+k, w_start:w_start+k]
                        
                        dW[c_out] += d_out[n, c_out, i, j] * patch
                        db[c_out] += d_out[n, c_out, i, j]
                        dx_padded[n, :, h_start:h_start+k, w_start:w_start+k] += d_out[n, c_out, i, j] * self.W[c_out]
        
        dx = dx_padded[:, :, p:-p, p:-p] if p > 0 else dx_padded
        # update
        self.W -= lr * dW
        self.b -= lr * db
        return dx


In [None]:
class ReLU:
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)
    
    def backward(self, d_out):
        return d_out * (self.x > 0)


In [None]:
class MaxPool2D:
    def __init__(self, size=2, stride=2):
        self.size = size
        self.stride = stride
        
    def forward(self, x):
        self.x = x
        N, C, H, W = x.shape
        s = self.stride
        H_out, W_out = H//s, W//s
        self.out = np.zeros((N, C, H_out, W_out))
        self.argmax = np.zeros_like(x, dtype=bool)
        
        for n in range(N):
            for c in range(C):
                for i in range(H_out):
                    for j in range(W_out):
                        h_start, w_start = i*s, j*s
                        patch = x[n, c, h_start:h_start+s, w_start:w_start+s]
                        max_val = np.max(patch)
                        self.out[n, c, i, j] = max_val
                        self.argmax[n, c, h_start:h_start+s, w_start:w_start+s] = (patch == max_val)
        return self.out
    
    def backward(self, d_out):
        dx = np.zeros_like(self.x)
        N, C, H_out, W_out = d_out.shape
        s = self.stride
        
        for n in range(N):
            for c in range(C):
                for i in range(H_out):
                    for j in range(W_out):
                        h_start, w_start = i*s, j*s
                        dx[n, c, h_start:h_start+s, w_start:w_start+s] += d_out[n, c, i, j] * self.argmax[n, c, h_start:h_start+s, w_start:w_start+s]
        return dx


In [None]:
class FullyConnected:
    def __init__(self, in_dim, out_dim):
        limit = np.sqrt(6 / (in_dim + out_dim))
        self.W = np.random.uniform(-limit, limit, (in_dim, out_dim))
        self.b = np.zeros((1, out_dim))
    
    def forward(self, x):
        self.x = x.reshape(x.shape[0], -1)
        return self.x @ self.W + self.b
    
    def backward(self, d_out, lr=0.001):
        dW = self.x.T @ d_out
        db = np.sum(d_out, axis=0, keepdims=True)
        dx = d_out @ self.W.T
        self.W -= lr * dW
        self.b -= lr * db
        return dx.reshape((-1,) + self.x.shape[1:])


In [None]:
def softmax(x):
    x = x - np.max(x, axis=1, keepdims=True)  # stability
    exp = np.exp(x)
    return exp / np.sum(exp, axis=1, keepdims=True)

def cross_entropy(pred, y):
    N = pred.shape[0]
    log_likelihood = -np.log(pred[np.arange(N), y] + 1e-9)
    return np.sum(log_likelihood) / N

def softmax_backward(pred, y):
    grad = pred.copy()
    grad[np.arange(len(y)), y] -= 1
    return grad / len(y)


In [None]:
class SimpleCNN:
    def __init__(self):
        self.conv1 = Conv2D(3, 8, kernel_size=3, padding=1)
        self.relu1 = ReLU()
        self.pool1 = MaxPool2D(2,2)
        
        self.conv2 = Conv2D(8, 16, kernel_size=3, padding=1)
        self.relu2 = ReLU()
        self.pool2 = MaxPool2D(2,2)
        
        self.fc = FullyConnected(16*8*8, 10)  # CIFAR-10, input 32x32 → pooled to 8x8
        
    def forward(self, x):
        out = self.conv1.forward(x)
        out = self.relu1.forward(out)
        out = self.pool1.forward(out)
        
        out = self.conv2.forward(out)
        out = self.relu2.forward(out)
        out = self.pool2.forward(out)
        
        out = self.fc.forward(out)
        return out
    
    def backward(self, d_out, lr=0.001):
        d_out = self.fc.backward(d_out, lr)
        
        d_out = self.pool2.backward(d_out)
        d_out = self.relu2.backward(d_out)
        d_out = self.conv2.backward(d_out, lr)
        
        d_out = self.pool1.backward(d_out)
        d_out = self.relu1.backward(d_out)
        d_out = self.conv1.backward(d_out, lr)


In [None]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train, y_test = y_train.flatten(), y_test.flatten()

# Normalize
x_train = x_train.astype(np.float32) / 255.0
x_test  = x_test.astype(np.float32) / 255.0

# NCHW format
x_train = np.transpose(x_train, (0,3,1,2))
x_test  = np.transpose(x_test, (0,3,1,2))

model = SimpleCNN()

EPOCHS = 2
BATCH = 32
LR = 0.001

for epoch in range(EPOCHS):
    idx = np.random.permutation(len(x_train))
    x_train, y_train = x_train[idx], y_train[idx]
    
    for i in range(0, len(x_train), BATCH):
        xb, yb = x_train[i:i+BATCH], y_train[i:i+BATCH]
        
        # forward
        logits = model.forward(xb)
        probs = softmax(logits)
        loss = cross_entropy(probs, yb)
        
        # backward
        d_out = softmax_backward(probs, yb)
        model.backward(d_out, lr=LR)
        
    # quick eval
    test_logits = model.forward(x_test[:500])  # subset for speed
    test_preds = np.argmax(softmax(test_logits), axis=1)
    acc = np.mean(test_preds == y_test[:500])
    print(f"Epoch {epoch+1}, Loss={loss:.4f}, Test Acc={acc:.3f}")
