# Convolutional Neural Networks from First Principles

### Imports

In [26]:
# Minimal packages and libraries used
import numpy as np

## Convolutional Layer Class
This class is used to create and add convolutional layers to our network

In [27]:
# Convolutional Layer
class Conv2D:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        # Initialize weights and biases
        self.weights = np.random.randn(out_channels, in_channels, kernel_size, kernel_size) * 0.1
        self.bias = np.zeros((out_channels, 1))

    def forward(self, x):
        self.x = x
        batch_size, _, h, w = x.shape

        # Apply padding
        if self.padding > 0:
            x = np.pad(x, ((0,0), (0,0), (self.padding, self.padding),
                          (self.padding, self.padding)), mode='constant')

        # Calculate output dimensions
        out_h = (h + 2*self.padding - self.kernel_size) // self.stride + 1
        out_w = (w + 2*self.padding - self.kernel_size) // self.stride + 1

        # Initialize output
        output = np.zeros((batch_size, self.out_channels, out_h, out_w))

        # Perform convolution
        for b in range(batch_size):
            for c_out in range(self.out_channels):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        h_end = h_start + self.kernel_size
                        w_start = j * self.stride
                        w_end = w_start + self.kernel_size

                        receptive_field = x[b, :, h_start:h_end, w_start:w_end]
                        output[b, c_out, i, j] = np.sum(receptive_field * self.weights[c_out]) + self.bias[c_out]

        return output

    def backward(self, grad, learning_rate=0.01):
        batch_size, _, grad_h, grad_w = grad.shape

        # Pad input for gradient calculation
        x_padded = self.x
        if self.padding > 0:
            x_padded = np.pad(self.x, ((0,0), (0,0), (self.padding, self.padding),
                                       (self.padding, self.padding)), mode='constant')

        # Initialize gradients
        d_weights = np.zeros_like(self.weights)
        d_bias = np.zeros_like(self.bias)
        d_x = np.zeros_like(x_padded)

        # Calculate gradients
        for b in range(batch_size):
            for c_out in range(self.out_channels):
                for i in range(grad_h):
                    for j in range(grad_w):
                        h_start = i * self.stride
                        h_end = h_start + self.kernel_size
                        w_start = j * self.stride
                        w_end = w_start + self.kernel_size

                        d_weights[c_out] += x_padded[b, :, h_start:h_end, w_start:w_end] * grad[b, c_out, i, j]
                        d_bias[c_out] += grad[b, c_out, i, j]
                        d_x[b, :, h_start:h_end, w_start:w_end] += self.weights[c_out] * grad[b, c_out, i, j]

        # Update weights
        self.weights -= learning_rate * d_weights / batch_size
        self.bias -= learning_rate * d_bias / batch_size

        # Remove padding from gradient if needed
        if self.padding > 0:
            d_x = d_x[:, :, self.padding:-self.padding, self.padding:-self.padding]

        return d_x

## MaxPooling Layer Class
This class is used to create and add MaxPooling Layers to our networks

In [28]:
# Max Pooling Layer
class MaxPool2D:
    def __init__(self, pool_size=2, stride=2):
        self.pool_size = pool_size
        self.stride = stride

    def forward(self, x):
        self.x = x
        batch_size, channels, h, w = x.shape

        out_h = (h - self.pool_size) // self.stride + 1
        out_w = (w - self.pool_size) // self.stride + 1

        output = np.zeros((batch_size, channels, out_h, out_w))
        self.max_indices = np.zeros_like(output, dtype=int)

        for b in range(batch_size):
            for c in range(channels):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        h_end = h_start + self.pool_size
                        w_start = j * self.stride
                        w_end = w_start + self.pool_size

                        pool_region = x[b, c, h_start:h_end, w_start:w_end]
                        output[b, c, i, j] = np.max(pool_region)

        return output

    def backward(self, grad):
        d_x = np.zeros_like(self.x)
        batch_size, channels, grad_h, grad_w = grad.shape

        for b in range(batch_size):
            for c in range(channels):
                for i in range(grad_h):
                    for j in range(grad_w):
                        h_start = i * self.stride
                        h_end = h_start + self.pool_size
                        w_start = j * self.stride
                        w_end = w_start + self.pool_size

                        pool_region = self.x[b, c, h_start:h_end, w_start:w_end]
                        max_val = np.max(pool_region)
                        mask = (pool_region == max_val)
                        d_x[b, c, h_start:h_end, w_start:w_end] += grad[b, c, i, j] * mask

        return d_x

## Fully Connected (Dense) Layer Class
This class is used to create and add dense Layers to our networks

In [29]:
# Fully Connected Layer
class Dense:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.1
        self.bias = np.zeros((1, output_size))

    def forward(self, x):
        self.x = x
        return np.dot(x, self.weights) + self.bias

    def backward(self, grad, learning_rate=0.01):
        d_weights = np.dot(self.x.T, grad)
        d_bias = np.sum(grad, axis=0, keepdims=True)
        d_x = np.dot(grad, self.weights.T)

        self.weights -= learning_rate * d_weights / self.x.shape[0]
        self.bias -= learning_rate * d_bias / self.x.shape[0]

        return d_x


## Activation Classes
Create softmax, and relu activation functions

In [30]:
# Activation Functions
class ReLU:
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)

    def backward(self, grad):
        return grad * (self.x > 0)

class Softmax:
    def forward(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        self.output = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        return self.output

    def backward(self, grad):
        return grad

In [31]:
# Flatten Layer
class Flatten:
    def forward(self, x):
        self.shape = x.shape
        return x.reshape(x.shape[0], -1)

    def backward(self, grad):
        return grad.reshape(self.shape)

In [32]:
# Loss Function
class CrossEntropyLoss:
    def forward(self, predictions, targets):
        self.predictions = predictions
        self.targets = targets
        batch_size = predictions.shape[0]

        # Clip predictions to prevent log(0)
        predictions = np.clip(predictions, 1e-10, 1 - 1e-10)
        loss = -np.sum(targets * np.log(predictions)) / batch_size
        return loss

    def backward(self):
        return (self.predictions - self.targets)

## CNN Class
Thhis class allows us to build, train a CNN and then allow us to predict from the trained CNN

In [33]:
class CNN:
    def __init__(self):
        self.conv1 = Conv2D(1, 8, 3, stride=1, padding=1)
        self.relu1 = ReLU()
        self.pool1 = MaxPool2D(2, 2)

        self.conv2 = Conv2D(8, 16, 3, stride=1, padding=1)
        self.relu2 = ReLU()
        self.pool2 = MaxPool2D(2, 2)

        self.flatten = Flatten()
        self.fc1 = Dense(16 * 7 * 7, 128)  # Assuming 28x28 input
        self.relu3 = ReLU()
        self.fc2 = Dense(128, 10)
        self.softmax = Softmax()

        self.loss_fn = CrossEntropyLoss()

    def forward(self, x):
        x = self.conv1.forward(x)
        x = self.relu1.forward(x)
        x = self.pool1.forward(x)

        x = self.conv2.forward(x)
        x = self.relu2.forward(x)
        x = self.pool2.forward(x)

        x = self.flatten.forward(x)
        x = self.fc1.forward(x)
        x = self.relu3.forward(x)
        x = self.fc2.forward(x)
        x = self.softmax.forward(x)

        return x

    def backward(self, learning_rate=0.01):
        grad = self.loss_fn.backward()

        grad = self.softmax.backward(grad)
        grad = self.fc2.backward(grad, learning_rate)
        grad = self.relu3.backward(grad)
        grad = self.fc1.backward(grad, learning_rate)
        grad = self.flatten.backward(grad)

        grad = self.pool2.backward(grad)
        grad = self.relu2.backward(grad)
        grad = self.conv2.backward(grad, learning_rate)

        grad = self.pool1.backward(grad)
        grad = self.relu1.backward(grad)
        grad = self.conv1.backward(grad, learning_rate)

    def train(self, x, y, learning_rate=0.01):
        # Forward pass
        predictions = self.forward(x)

        # Calculate loss
        loss = self.loss_fn.forward(predictions, y)

        # Backward pass
        self.backward(learning_rate)

        return loss

    def predict(self, x):
        predictions = self.forward(x)
        return np.argmax(predictions, axis=1)

## Helper Functions

In [34]:
# Helper function to create synthetic patterns
def create_synthetic_data(n_samples=500):
    """Create simple patterns: vertical lines (0) vs horizontal lines (1)"""
    X = np.zeros((n_samples, 1, 28, 28))
    y = np.zeros((n_samples, 10))

    for i in range(n_samples):
        label = i % 2  # Alternate between 0 and 1

        if label == 0:  # Vertical line
            col = np.random.randint(10, 18)
            X[i, 0, 5:23, col:col+2] = 1.0
        else:  # Horizontal line
            row = np.random.randint(10, 18)
            X[i, 0, row:row+2, 5:23] = 1.0

        # Add small noise
        X[i] += np.random.randn(1, 28, 28) * 0.1
        y[i, label] = 1

    return X, y

In [35]:
# Example Usage
np.random.seed(42)

# Create synthetic data with actual patterns
print("Creating synthetic dataset (vertical vs horizontal lines)...")
X_train, y_train = create_synthetic_data(n_samples=500)
X_test, y_test = create_synthetic_data(n_samples=100)

# Initialize model
model = CNN()

# Training loop
epochs = 5
batch_size = 10

print("\nTraining CNN from scratch...")
for epoch in range(epochs):
    epoch_loss = 0
    correct = 0
    total = 0

    # Shuffle training data
    indices = np.random.permutation(len(X_train))
    X_train_shuffled = X_train[indices]
    y_train_shuffled = y_train[indices]

    for i in range(0, len(X_train), batch_size):
        batch_x = X_train_shuffled[i:i+batch_size]
        batch_y = y_train_shuffled[i:i+batch_size]

        loss = model.train(batch_x, batch_y, learning_rate=0.01)
        epoch_loss += loss

        # Calculate accuracy
        preds = model.predict(batch_x)
        correct += np.sum(preds == np.argmax(batch_y, axis=1))
        total += len(batch_x)

    accuracy = 100 * correct / total
    avg_loss = epoch_loss / (len(X_train) // batch_size)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

# Test on unseen data
print("\n--- Testing on new data ---")
test_predictions = model.predict(X_test)
test_labels = np.argmax(y_test, axis=1)
test_accuracy = 100 * np.sum(test_predictions == test_labels) / len(test_labels)
print(f"Test Accuracy: {test_accuracy:.2f}%")

# Show some predictions
print(f"\nFirst 10 predictions: {test_predictions[:10]}")
print(f"Actual labels:        {test_labels[:10]}")

Creating synthetic dataset (vertical vs horizontal lines)...

Training CNN from scratch...


  output[b, c_out, i, j] = np.sum(receptive_field * self.weights[c_out]) + self.bias[c_out]


Epoch 1/5, Loss: 1.5068, Accuracy: 62.20%
Epoch 2/5, Loss: 0.5727, Accuracy: 83.80%
Epoch 3/5, Loss: 0.2851, Accuracy: 98.20%
Epoch 4/5, Loss: 0.0928, Accuracy: 100.00%
Epoch 5/5, Loss: 0.0374, Accuracy: 100.00%

--- Testing on new data ---
Test Accuracy: 100.00%

First 10 predictions: [0 1 0 1 0 1 0 1 0 1]
Actual labels:        [0 1 0 1 0 1 0 1 0 1]
