In [2]:
import numpy as np
import import_ipynb
from ANN import Layer_Dense, Activation_Softmax, Loss_CategoricalCrossentropy, Activation_Softmax_Loss_CategoricalCrossentropy

In [3]:
class Layer_Conv:
    def __init__(self, n_filters, kernel_size):
        self.n_filters = n_filters
        self.kernel_size = kernel_size

        # Filters are 3D: (num_filters, kernel_height, kernel_width)
        # We divide by kernel_size**2 to keep weights small (He initialization-ish)
        self.filters = np.random.randn(n_filters, kernel_size, kernel_size) / (kernel_size**2)

        # One bias for each filter
        self.biases = np.zeros(n_filters)


    def forward(self, input):
        self.last_input = input # Save for backprop
        h, w = input.shape

        # Calculate output dimensions: (Input_size - Filter_size) + 1
        # (Assuming stride=1 and no padding for this simple version)
        output_h = h - self.kernel_size + 1
        output_w = w - self.kernel_size + 1

        # Initialize output: (number of filters, new_height, new_width)
        self.output = np.zeros((self.n_filters, output_h, output_w))

        # The "Sliding Window" Loops
        for i in range(output_h):
            for j in range(output_w):
                # 1. Extract the current patch (region of interest)
                im_region = input[i:(i + self.kernel_size), j:(j + self.kernel_size)]

                # 2. Element-wise multiply the patch with ALL filters and sum
                # np.sum over the axes of the kernel height/width
                self.output[:, i, j] = np.sum(im_region * self.filters, axis=(1, 2)) + self.biases

        return self.output


    def backward(self, d_l_d_out, learning_rate):
        """
        d_l_d_out: The gradient of the loss with respect to the output of this layer.
        """
        d_l_d_filters = np.zeros(self.filters.shape)

        h, w = self.last_input.shape

        for i in range(h - self.kernel_size + 1):
            for j in range(w - self.kernel_size + 1):
                # For every region the filter touched during forward pass...
                im_region = self.last_input[i:(i + self.kernel_size), j:(j + self.kernel_size)]

                # For each filter, the gradient is the region multiplied by
                # the gradient of the output at that specific (i, j) position
                for f in range(self.n_filters):
                    d_l_d_filters[f] += d_l_d_out[f, i, j] * im_region

        # Update weights (SGD)
        self.filters -= learning_rate * d_l_d_filters
        self.biases -= learning_rate * np.sum(d_l_d_out, axis=(1, 2))

        return None

In [4]:
class MaxPool2:
    def forward(self, input):
        self.last_input = input
        num_filters, h, w = input.shape
        # Output is half the size (stride=2)
        self.output = np.zeros((num_filters, h // 2, w // 2))

        for i in range(h // 2):
            for j in range(w // 2):
                im_region = input[:, (i*2):(i*2+2), (j*2):(j*2+2)]
                self.output[:, i, j] = np.amax(im_region, axis=(1, 2))
        return self.output

    def backward(self, d_l_d_out):
        # We only pass the gradient to the pixel that was the MAX
        d_l_d_input = np.zeros(self.last_input.shape)
        num_filters, h, w = self.last_input.shape

        for i in range(h // 2):
            for j in range(w // 2):
                im_region = self.last_input[:, (i*2):(i*2+2), (j*2):(j*2+2)]
                # Find the index of the max value in the region
                for f in range(num_filters):
                    # Get coordinates of the maximum value in this 2x2 patch
                    idx = np.unravel_index(np.argmax(im_region[f]), (2, 2))
                    d_l_d_input[f, i*2 + idx[0], j*2 + idx[1]] = d_l_d_out[f, i, j]
        return d_l_d_input

In [5]:
class Layer_Flatten:
    def forward(self, input):
        self.input_shape = input.shape
        return input.flatten().reshape(1, -1)

    def backward(self, dvalues):
        # Reshape the 1D gradient back to the 3D shape of the input
        return dvalues.reshape(self.input_shape)

In [6]:
conv = Layer_Conv(8, 3)                  # 8 filters, 3x3 size
pool = MaxPool2()                        # 2x2 Max Pooling
flatten = Layer_Flatten()
# After conv(26x26) and pool(13x13), we have 8 * 13 * 13 = 1352 inputs
dense = Layer_Dense(1352, 10)            # 10 output classes (digits 0-9)
loss_softmax = Activation_Softmax_Loss_CategoricalCrossentropy()

# FORWARD PASS
image = np.random.randn(28, 28)          # A "fake" 28x28 grayscale image
label = 7                                # The "true" digit is 7

# 1. Convolution
out = conv.forward(image)                # Shape: (8, 26, 26)
# 2. ReLU
out = np.maximum(0, out)                 # (Simple ReLU)
# 3. Pooling
out = pool.forward(out)                  # Shape: (8, 13, 13)
# 4. Flatten
out = flatten.forward(out)               # Shape: (1, 1352)
# 5. Dense + Softmax/Loss
loss = loss_softmax.forward(dense.forward(out) or dense.output, np.array([label]))

print(f"Initial Loss: {loss:.4f}")

# BACKWARD PASS 
# 1. Loss -> Dense
loss_softmax.backward(loss_softmax.output, np.array([label]))
dense.backward(loss_softmax.dinputs)

# 2. Dense -> Flatten
d_flatten = flatten.backward(dense.dinputs)

# 3. Flatten -> Pooling
d_pool = pool.backward(d_flatten)

# 4. Pooling -> Conv
# We modify our Conv class slightly to accept d_pool and update its filters
conv.backward(d_pool, learning_rate=0.01)

print("Backward pass complete. Filters updated!")

Initial Loss: 2.3213
Backward pass complete. Filters updated!
