# Introduction to Convolutional Neural Networks (CNNs)

## 1. Why CNNs?

Convolutional Neural Networks (CNNs) are widely used for complex tasks such as **image classification**.  
Images are often high-dimensional (e.g., 224×224 pixels with 3 color channels), resulting in 224×224×3 input features. Using a fully connected neural network on such inputs would require an **enormous number of parameters**, making training inefficient and prone to overfitting.

**Key advantages of CNNs:**
- **Local receptive fields:** Each neuron processes only a small region of the input, capturing local patterns.
- **Shared weights (filters):** Reduces the number of parameters and improves generalization.
- **Hierarchical feature learning:** Lower layers capture simple patterns (edges, textures), higher layers capture complex patterns (shapes, objects).

---

## 2. The Convolution Operation

A convolution layer applies **filters (kernels)** to the input image to extract features.

**Step-by-step process:**
1. Place the filter on a specific region of the input image.
2. Perform element-wise multiplication between the filter values and the corresponding input values.
3. Sum all the products to get a single output value for that region.
4. Slide the filter across the entire image to produce the feature map.

**Example:**  
A **Sobel filter** detects edges in an image:
- Vertical Sobel filter → detects vertical edges
- Horizontal Sobel filter → detects horizontal edges

**Additional concepts:**
- **Padding:** Adding zeros around the image to maintain output size.
- **Stride:** The step size when sliding the filter over the input.

---



# Implémentation
### Convolutional Layer 

In [None]:
# =============================
# CNN From Scratch - MNIST
# =============================

import numpy as np
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt

# -----------------------------
# 1. Preprocessing
# -----------------------------
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0  # Normalize to 0-1
x_test = x_test / 255.0
x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)

# -----------------------------
# 2. Layers
# -----------------------------
class Conv3x:
    """Convolutional layer 3x3 with ReLU activation"""
    def __init__(self, num_filters):
        self.num_filters = num_filters
        self.filters = np.random.randn(num_filters, 3, 3) / 9.0

    def iterate_regions(self, image):
        h, w = image.shape
        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:i+3, j:j+3]
                yield im_region, i, j

    def forward(self, input):
        h, w = input.shape
        self.last_input = input
        output = np.zeros((h - 2, w - 2, self.num_filters))
        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.sum(im_region * self.filters, axis=(1,2))
        return np.maximum(0, output)  # ReLU

    def backprop(self, d_L_d_out, learning_rate):
        d_L_d_filters = np.zeros(self.filters.shape)
        for im_region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                if d_L_d_out[i,j,f] <= 0:
                    continue
                d_L_d_filters[f] += d_L_d_out[i,j,f] * im_region
        # Update filters
        self.filters -= learning_rate * d_L_d_filters
        return None  # For simplicity, not propagating to input


class MaxPool2:
    """Max Pooling 2x2"""
    def iterate_regions(self, image):
        h, w, num_filters = image.shape
        new_h, new_w = h // 2, w // 2
        for i in range(new_h):
            for j in range(new_w):
                im_region = image[i*2:i*2+2, j*2:j*2+2]
                yield im_region, i, j

    def forward(self, input):
        self.last_input = input
        h, w, num_filters = input.shape
        output = np.zeros((h//2, w//2, num_filters))
        self.last_max = np.zeros_like(input, dtype=bool)
        for im_region, i, j in self.iterate_regions(input):
            for f in range(num_filters):
                max_val = np.max(im_region[:,:,f])
                output[i,j,f] = max_val
                # store max position for backprop
                self.last_max[i*2:i*2+2, j*2:j*2+2, f] = (im_region[:,:,f] == max_val)
        return output

    def backprop(self, d_L_d_out):
        d_L_d_input = np.zeros_like(self.last_input)
        for im_region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.last_input.shape[2]):
                gradient = d_L_d_out[i,j,f]
                d_L_d_input[i*2:i*2+2, j*2:j*2+2, f] += gradient * self.last_max[i*2:i*2+2, j*2:j*2+2,f]
        return d_L_d_input


class Softmax:
    """Fully connected softmax layer"""
    def __init__(self, input_len, nodes):
        self.weights = np.random.randn(input_len, nodes) / input_len
        self.bias = np.zeros(nodes)

    def forward(self, input):
        self.last_input_shape = input.shape
        input_flat = input.flatten()
        self.last_input = input_flat
        totals = np.dot(input_flat, self.weights) + self.bias
        self.last_totals = totals
        exp = np.exp(totals - np.max(totals))  # numerical stability
        return exp / np.sum(exp)

    def backprop(self, d_L_d_out, learning_rate):
        d_L_d_totals = d_L_d_out
        d_L_d_weights = np.dot(self.last_input[:,None], d_L_d_totals[None,:])
        d_L_d_bias = d_L_d_totals
        self.weights -= learning_rate * d_L_d_weights
        self.bias -= learning_rate * d_L_d_bias
        
        d_L_d_input = np.dot(self.weights, d_L_d_totals)  # shape = (input_len,)
        return d_L_d_input.reshape(self.last_input_shape)



# -----------------------------
# 3. Helper functions
# -----------------------------
def cross_entropy_loss(pred, label):
    return -np.log(pred[label] + 1e-7)

def accuracy(pred, label):
    return 1 if np.argmax(pred) == label else 0

def softmax_loss_and_gradient(pred, label):
    gradient = pred.copy()
    gradient[label] -= 1
    return gradient


# -----------------------------
# 4. Initialize network
# -----------------------------
conv = Conv3x(8)
pool = MaxPool2()
softmax = Softmax(13*13*8, 10)  # 10 classes MNIST

# -----------------------------
# 5. Training loop (mini-batch SGD)
# -----------------------------
learning_rate = 0.005
batch_size = 32
num_epochs = 1  # For demo, increase later

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}")
    permutation = np.random.permutation(len(x_train))
    x_train_shuffled = x_train[permutation]
    y_train_shuffled = y_train[permutation]

    loss = 0
    num_correct = 0

    for i in range(0, len(x_train), batch_size):
        x_batch = x_train_shuffled[i:i+batch_size]
        y_batch = y_train_shuffled[i:i+batch_size]

        for im, label in zip(x_batch, y_batch):
            # Forward pass
            out = conv.forward(im)
            out = pool.forward(out)
            pred = softmax.forward(out)

            # Loss & accuracy
            loss += cross_entropy_loss(pred, label)
            num_correct += accuracy(pred, label)

            # Backpropagation
            gradient = softmax_loss_and_gradient(pred, label)
            grad_back = softmax.backprop(gradient, learning_rate)
            grad_back = pool.backprop(grad_back)
            conv.backprop(grad_back, learning_rate)

        if (i // batch_size) % 100 == 99:
            print(f"Step {i} - Loss: {loss/ (100*batch_size):.4f}, Accuracy: {num_correct/(100*batch_size):.4f}")
            loss = 0
            num_correct = 0

print("Training completed!")


Epoch 1


ValueError: shapes (10,) and (1352,10) not aligned: 10 (dim 0) != 1352 (dim 0)