In [38]:
import numpy as np

def one_hot_encode(Y, num_classes=10):
    one_hot = np.zeros((Y.size, num_classes))
    one_hot[np.arange(Y.size), Y] = 1
    return one_hot.T  # shape: (num_classes, samples)

# Activation functions
def relu(Z):
    return np.maximum(0, Z)
def relu_backward(dA, Z):
    dZ = dA.copy()
    dZ[Z <= 0] = 0
    return dZ

def softmax(Z):
    Z = Z - np.max(Z, axis=0, keepdims=True)
    exp_Z = np.exp(Z)
    return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)

# Layers
def conv_single_step(slice_X, W, b):
    return np.sum(slice_X * W) + b

def conv_forward(X, W, b, stride=1):
    m, H_in, W_in, C_in = X.shape
    f, _, _, n_filters = W.shape
    H_out = (H_in - f) // stride + 1
    W_out = (W_in - f) // stride + 1
    Z = np.zeros((m, H_out, W_out, n_filters))
    for i in range(m):
        for h in range(H_out):
            for w in range(W_out):
                vert_start = h * stride
                vert_end = vert_start + f
                horiz_start = w * stride
                horiz_end = horiz_start + f
                X_slice = X[i, vert_start:vert_end, horiz_start:horiz_end, :]
                for c in range(n_filters):
                    Z[i, h, w, c] = conv_single_step(X_slice, W[:, :, :, c], b[c])
    return Z

def maxpool_forward(X, size=2, stride=2):
    m, H_in, W_in, C = X.shape
    H_out = (H_in - size) // stride + 1
    W_out = (W_in - size) // stride + 1
    out = np.zeros((m, H_out, W_out, C))
    for i in range(m):
        for h in range(H_out):
            for w in range(W_out):
                vert_start = h * stride
                vert_end = vert_start + size
                horiz_start = w * stride
                horiz_end = horiz_start + size
                for c in range(C):
                    window = X[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    out[i, h, w, c] = np.max(window)
    return out

def flatten(X):
    return X.reshape(X.shape[0], -1).T  # (features, samples)

def dense_forward(X, W, b):
    return W.dot(X) + b

# Loss and metrics
def compute_loss(Y_hat, Y):
    m = Y.shape[1]
    loss = -np.sum(Y * np.log(Y_hat + 1e-8)) / m
    return loss

def predict(Y_hat):
    return np.argmax(Y_hat, axis=0)

def accuracy(preds, labels):
    return np.mean(preds == labels)

# Backprop
def dense_backward(dZ, A_prev, W):
    m = A_prev.shape[1]
    dW = (1/m) * dZ.dot(A_prev.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = W.T.dot(dZ)
    return dA_prev, dW, db

def maxpool_backward(dA, A_prev, size=2, stride=2):
    m, H_prev, W_prev, C = A_prev.shape
    dX = np.zeros_like(A_prev)
    H_out = dA.shape[1]
    W_out = dA.shape[2]
    for i in range(m):
        for h in range(H_out):
            for w in range(W_out):
                vert_start = h * stride
                vert_end = vert_start + size
                horiz_start = w * stride
                horiz_end = horiz_start + size
                for c in range(C):
                    window = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    mask = (window == np.max(window))
                    dX[i, vert_start:vert_end, horiz_start:horiz_end, c] += dA[i, h, w, c] * mask
    return dX

def conv_backward(dZ, A_prev, W, stride=1):
    m, H_prev, W_prev, C_in = A_prev.shape
    f, _, _, n_filters = W.shape
    H_out, W_out = dZ.shape[1], dZ.shape[2]
    dA_prev = np.zeros_like(A_prev)
    dW = np.zeros_like(W)
    db = np.zeros(n_filters)
    for i in range(m):
        for h in range(H_out):
            for w in range(W_out):
                vert_start = h * stride
                vert_end = vert_start + f
                horiz_start = w * stride
                horiz_end = horiz_start + f
                for c in range(n_filters):
                    window = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, :]
                    dW[:, :, :, c] += window * dZ[i, h, w, c]
                    dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, :] += W[:, :, :, c] * dZ[i, h, w, c]
                    db[c] += dZ[i, h, w, c]
    return dA_prev, dW, db

# Parameter update
def update_params(params, grads, lr):
    for p, dp in zip(params, grads):
        p -= lr * dp

# Initialize parameters
def initialize_params():
    W1 = np.random.randn(3,3,3,8) * 0.1
    b1 = np.zeros(8)
    W2 = np.random.randn(10, 1800) * 0.1
    b2 = np.zeros((10,1))
    return W1, b1, W2, b2