In [1]:
import numpy as np
from sklearn.datasets import fetch_openml  

In [2]:
import sys
sys.path.append(r"C:\Users\kanin\Desktop\CV\Neural network")
import core.nn as nn
import core.optim as optim
from core.losses import MSE, CrossEntropy
from core.utils import accuracy

In [3]:
def dataset(loader_fn, train_num, test_num):
    data_x, data_y = loader_fn
    
    classes = np.unique(data_y)
    
    train_x_list = []
    train_y_list = []
    test_x_list = []
    test_y_list = []
    
    for cls in classes:
        cls_indices = np.where(data_y == cls)[0]
        cls_indices = np.random.permutation(cls_indices)
        
        X_cls = data_x[cls_indices]
        Y_cls = data_y[cls_indices]
        
        train_x_list.append(X_cls[:train_num])
        train_y_list.append(Y_cls[:train_num])
        
        test_x_list.append(X_cls[train_num:train_num + test_num])
        test_y_list.append(Y_cls[train_num:train_num + test_num])
        
    X_train = np.concatenate(train_x_list)
    y_train = np.concatenate(train_y_list)
    X_test = np.concatenate(test_x_list)
    y_test = np.concatenate(test_y_list)
    
    train_perm = np.random.permutation(len(X_train))
    X_train = X_train[train_perm]
    y_train = y_train[train_perm]
    
    test_perm = np.random.permutation(len(X_test))
    X_test = X_test[test_perm]
    y_test = y_test[test_perm]
    
    return X_train, y_train, X_test, y_test

In [4]:
data = fetch_openml("mnist_784")

In [5]:
# Load and preprocess MNIST dataset
# Normalize pixel values to range 0-1 by dividing by 255.0 (grayscale images originally 0-255)
# Convert labels to integer type using astype('int16') to ensure numeric operations work correctly 

X_train, y_train, X_test, y_test = dataset(
    (
        np.asarray(data["data"].values) / 255.0,                 # Normalize input images
        np.asarray(data["target"].values.astype('int16'))        # Convert labels to integers
    ),
    train_num=30,
    test_num=10
)


In [6]:
X_train, X_test = X_train.reshape(-1, 1, 28, 28), X_test.reshape(-1, 1, 28, 28)

In [7]:
class MaxPool2d(nn.Module):
    def __init__(self, pool_size=(2,2), stride=(1,1)):
        super().__init__()
        # Ensure pool_size and stride are tuples (height, width)
        if isinstance(pool_size, int):
            pool_size = (pool_size, pool_size)
        if isinstance(stride, int):
            stride = (stride, stride)

        self.pool_size_y, self.pool_size_x = pool_size  # pooling window height and width
        self.stride_y, self.stride_x = stride          # stride in vertical and horizontal directions
        self.x = None     # cache input for backward pass
        self.mask = None  # cache mask for backward pass

    def forward(self, x):
        self.x = x      # input array of shape (N, C, H, W)
        N, C, H, W = x.shape

        # Calculate output height and width
        H_out = (H - self.pool_size_y) // self.stride_y + 1
        W_out = (W - self.pool_size_x) // self.stride_x + 1

        # Initialize output array and mask
        out = np.zeros((N, C, H_out, W_out))
        self.mask = np.zeros_like(x, dtype=int)  # shape (N, C, H, W)

        # Loop over batch, channels, and output spatial dimensions
        for n in range(N):
            for c in range(C):
                for i in range(H_out):
                    for j in range(W_out):
                        # Determine window start and end indices
                        start_i = i * self.stride_y
                        start_j = j * self.stride_x

                        # Extract region of input corresponding to the pooling window
                        region = x[n, c, start_i:start_i+self.pool_size_y,
                                   start_j:start_j+self.pool_size_x]

                        # Find max value in window
                        max_val = np.max(region)
                        out[n, c, i, j] = max_val

                        # Create local mask (1 where max, 0 elsewhere)
                        local_mask = (region == max_val).astype(int)

                        # Store local mask in the global mask
                        self.mask[n, c, start_i:start_i+self.pool_size_y,
                                  start_j:start_j+self.pool_size_x] += local_mask

        return out  # (N, C, H_out, W_out)

    def backward(self, grad_output):
        N, C, H, W = self.x.shape
        H_out, W_out = grad_output.shape[2], grad_output.shape[3]

        # Initialize gradient w.r.t input as float
        dx = np.zeros(self.x.shape, dtype=grad_output.dtype)

        for n in range(N):
            for c in range(C):
                for i in range(H_out):
                    for j in range(W_out):
                        start_i = i * self.stride_y
                        start_j = j * self.stride_x

                        # Use cached mask instead of recomputing
                        mask_window = self.mask[n, c, start_i:start_i+self.pool_size_y,
                                                start_j:start_j+self.pool_size_x]

                        # Distribute gradient to max locations
                        dx[n, c, start_i:start_i+self.pool_size_y,
                        start_j:start_j+self.pool_size_x] += mask_window * grad_output[n, c, i, j]

        return dx

In [8]:
class Conv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super().__init__()
        if isinstance(kernel_size, int):
            kernel_size = (kernel_size, kernel_size)

        self.KH, self.KW = kernel_size
        self.stride = stride
        self.padding = padding

        scale = np.sqrt(2 / (in_channels * self.KH * self.KW))
        self.W = nn.Parameter(scale * np.random.randn(out_channels, in_channels, self.KH, self.KW))
        self.b = nn.Parameter(np.zeros(out_channels))

        self.x = None

    def forward(self, x):
        self.x = x
        N, C, H, W = x.shape
        F_out = self.W.data.shape[0]
        KH, KW = self.KH, self.KW
        stride, pad = self.stride, self.padding

        # Output dimensions
        H_out = (H + 2*pad - KH) // stride + 1
        W_out = (W + 2*pad - KW) // stride + 1

        # Pad input
        x_padded = np.pad(x, ((0,0),(0,0),(pad,pad),(pad,pad)), mode="constant")
        out = np.zeros((N, F_out, H_out, W_out))

        # Sliding-window style
        for n in range(N):
            for i in range(H_out):
                for j in range(W_out):
                    h_start = i * stride
                    h_end = h_start + KH
                    w_start = j * stride
                    w_end = w_start + KW

                    # window of shape (C, KH, KW)
                    window = x_padded[n, :, h_start:h_end, w_start:w_end]

                    # Compute convolution for all filters at once
                    for f in range(F_out):
                        out[n, f, i, j] = np.sum(window * self.W.data[f]) + self.b.data[f]

        return out

    def backward(self, grad_output):
        x = self.x
        N, C, H, W = x.shape
        F_out = self.W.data.shape[0]
        KH, KW = self.KH, self.KW
        stride, pad = self.stride, self.padding

        H_out, W_out = grad_output.shape[2], grad_output.shape[3]

        # Initialize gradients
        dx = np.zeros_like(x)
        dW = np.zeros_like(self.W.data)
        db = np.zeros_like(self.b.data)

        # Pad for easy indexing
        dx_padded = np.pad(dx, ((0,0),(0,0),(pad,pad),(pad,pad)), mode="constant")
        x_padded = np.pad(x, ((0,0),(0,0),(pad,pad),(pad,pad)), mode="constant")

        # Sliding-window style backward
        for n in range(N):
            for i in range(H_out):
                for j in range(W_out):
                    h_start = i * stride
                    h_end = h_start + KH
                    w_start = j * stride
                    w_end = w_start + KW

                    window = x_padded[n, :, h_start:h_end, w_start:w_end]

                    for f in range(F_out):
                        g = grad_output[n, f, i, j]
                        db[f] += g
                        dW[f] += g * window
                        dx_padded[n, :, h_start:h_end, w_start:w_end] += g * self.W.data[f]

        dx = dx_padded[:, :, pad:H+pad, pad:W+pad]

        # accumulate gradients in Parameter objects
        self.W.grad += dW
        self.b.grad += db

        return dx


In [9]:
class Flatten(nn.Module):
    def forward(self, x):
        self.input_shape = x.shape
        return x.reshape(x.shape[0], -1)

    def backward(self, grad_output):
        return grad_output.reshape(self.input_shape)


In [None]:
model_1 = nn.Sequential([
    Conv2d(in_channels=1, out_channels=4, kernel_size=3),  
    nn.ReLU(),
    Flatten(),                                              
    nn.Linear(2704, 32),
    nn.ReLU(),
    nn.Linear(32, 10),
])

In [None]:
model_2 = nn.Sequential([
    Conv2d(in_channels=1, out_channels=4, kernel_size=3),
    nn.ReLU(),

    MaxPool2d(pool_size=(2,2), stride=(2,2)),       # 26â†’13

    Flatten(),                                      # 4 * 13 * 13 = 676
    nn.Linear(676, 32),
    nn.ReLU(),
    nn.Linear(32, 10),
])


In [None]:
model = model_1

loss_fn = CrossEntropy()

epochs = 15
batch_size = 32
initial_lr = 0.01

optimizer = optim.SGD(model.parameters(), lr=initial_lr)

for epoch in range(epochs):
    for i in range(0, X_train.shape[0], batch_size):
        x_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]

        logits = model.forward(x_batch)
        loss = loss_fn.forward(logits, y_batch)
        grad_output = loss_fn.backward()
        model.backward(grad_output)
        optimizer.step()
        optimizer.zero_grad()

    logits_train = model.forward(X_train)
    train_loss = loss_fn.forward(logits_train, y_train)
    train_acc = accuracy(logits_train, y_train)

    logits_test = model.forward(X_test)
    test_loss = loss_fn.forward(logits_test, y_test)
    test_acc = accuracy(logits_test, y_test)

    print(f"Epoch {epoch+1} Summary: "
          f"Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, "
          f"Test Acc={test_acc:.4f}, Test Loss={test_loss:.4f}")

Epoch 1 Summary: Train Acc=0.1967, Train Loss=4.2005, Test Acc=0.1900, Test Loss=4.3799
Epoch 2 Summary: Train Acc=0.2633, Train Loss=3.3215, Test Acc=0.2300, Test Loss=3.3059
Epoch 3 Summary: Train Acc=0.2867, Train Loss=2.8011, Test Acc=0.2800, Test Loss=2.7836
Epoch 4 Summary: Train Acc=0.3233, Train Loss=2.4706, Test Acc=0.2900, Test Loss=2.4521
Epoch 5 Summary: Train Acc=0.3800, Train Loss=2.2827, Test Acc=0.3300, Test Loss=2.2657
Epoch 6 Summary: Train Acc=0.4500, Train Loss=2.1713, Test Acc=0.4200, Test Loss=2.1569
Epoch 7 Summary: Train Acc=0.5000, Train Loss=2.0904, Test Acc=0.4900, Test Loss=2.0795
Epoch 8 Summary: Train Acc=0.5200, Train Loss=2.0166, Test Acc=0.5400, Test Loss=2.0092
Epoch 9 Summary: Train Acc=0.5567, Train Loss=1.9428, Test Acc=0.5600, Test Loss=1.9389
Epoch 10 Summary: Train Acc=0.5733, Train Loss=1.8672, Test Acc=0.5600, Test Loss=1.8670
Epoch 11 Summary: Train Acc=0.5733, Train Loss=1.7894, Test Acc=0.5600, Test Loss=1.7926
Epoch 12 Summary: Train Acc=0.