# CNN Architecture Deep Dive

Understanding Convolutional Neural Networks for image classification.

## Topics
1. Convolution operation
2. Pooling layers
3. Building a CNN from scratch
4. Evaluation metrics

In [None]:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)

## 1. Convolution Operation

A convolution slides a kernel across the input, computing dot products:

$$(I * K)_{ij} = \sum_m \sum_n I_{i+m, j+n} \cdot K_{m,n}$$

In [None]:
def conv2d(image, kernel, stride=1, padding=0):
    """2D convolution from scratch.
    
    Args:
        image: (H, W) input
        kernel: (kH, kW) filter
        stride: step size
        padding: zero padding
    """
    if padding > 0:
        image = np.pad(image, padding, mode='constant')
    
    H, W = image.shape
    kH, kW = kernel.shape
    
    out_H = (H - kH) // stride + 1
    out_W = (W - kW) // stride + 1
    
    output = np.zeros((out_H, out_W))
    
    for i in range(out_H):
        for j in range(out_W):
            region = image[i*stride:i*stride+kH, j*stride:j*stride+kW]
            output[i, j] = np.sum(region * kernel)
    
    return output

# Example: Edge detection
image = np.array([
    [0, 0, 0, 0, 0],
    [0, 1, 1, 1, 0],
    [0, 1, 1, 1, 0],
    [0, 1, 1, 1, 0],
    [0, 0, 0, 0, 0]
], dtype=float)

# Sobel edge detector
sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])

edges = conv2d(image, sobel_x)
print('Edge detection result:')
print(edges)

## 2. Pooling Layers

Reduce spatial dimensions while keeping important features:
- **Max Pooling**: Take maximum in each region
- **Average Pooling**: Take average

In [None]:
def max_pool2d(x, pool_size=2):
    """Max pooling from scratch."""
    H, W = x.shape
    out_H = H // pool_size
    out_W = W // pool_size
    
    output = np.zeros((out_H, out_W))
    
    for i in range(out_H):
        for j in range(out_W):
            region = x[i*pool_size:(i+1)*pool_size, 
                      j*pool_size:(j+1)*pool_size]
            output[i, j] = np.max(region)
    
    return output

# Example
feature_map = np.array([
    [1, 3, 2, 4],
    [5, 6, 1, 2],
    [7, 2, 3, 1],
    [4, 5, 6, 8]
], dtype=float)

pooled = max_pool2d(feature_map, 2)
print('Max pooling 4x4 -> 2x2:')
print(pooled)

## 3. Activation Functions

Introduce non-linearity so networks can learn complex patterns.

In [None]:
def relu(x):
    """ReLU: max(0, x)"""
    return np.maximum(0, x)

def sigmoid(x):
    """Sigmoid: 1 / (1 + e^-x)"""
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def softmax(x):
    """Softmax for classification."""
    x_shifted = x - np.max(x)
    exp_x = np.exp(x_shifted)
    return exp_x / np.sum(exp_x)

# Visualization
x = np.linspace(-5, 5, 100)
plt.figure(figsize=(10, 3))
plt.subplot(131); plt.plot(x, relu(x)); plt.title('ReLU')
plt.subplot(132); plt.plot(x, sigmoid(x)); plt.title('Sigmoid')
plt.subplot(133); plt.plot(x, np.tanh(x)); plt.title('Tanh')
plt.tight_layout()
plt.show()

## 4. Simple CNN Implementation

In [None]:
class SimpleCNN:
    """Minimal CNN for understanding."""
    
    def __init__(self):
        # Learnable filters (normally random init)
        self.conv1_filters = np.random.randn(4, 3, 3) * 0.1
        self.fc_weights = None  # Set after first forward
    
    def forward(self, x):
        """Forward pass: Conv -> ReLU -> Pool -> FC."""
        
        # Conv layer (4 filters)
        conv_out = []
        for f in self.conv1_filters:
            conv_out.append(conv2d(x, f, padding=1))
        conv_out = np.stack(conv_out)  # (4, H, W)
        
        # ReLU
        relu_out = relu(conv_out)
        
        # Max pooling
        pool_out = []
        for fm in relu_out:
            pool_out.append(max_pool2d(fm, 2))
        pool_out = np.stack(pool_out)
        
        # Flatten
        flat = pool_out.flatten()
        
        # FC layer (init if needed)
        if self.fc_weights is None:
            self.fc_weights = np.random.randn(flat.shape[0], 10) * 0.1
        
        # Output logits
        logits = flat @ self.fc_weights
        probs = softmax(logits)
        
        return probs

# Test
cnn = SimpleCNN()
dummy_image = np.random.randn(28, 28)
probs = cnn.forward(dummy_image)
print(f'Output probabilities: {probs}')
print(f'Predicted class: {np.argmax(probs)}')

## 5. Evaluation Metrics

For classification tasks:

In [None]:
def compute_metrics(y_true, y_pred):
    """Compute classification metrics.
    
    Returns:
        accuracy, precision, recall, specificity, f1
    """
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))
    
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0  # Sensitivity
    specificity = TN / (TN + FP) if (TN + FP) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'specificity': specificity,
        'f1': f1
    }

# Example
y_true = np.array([1, 1, 1, 0, 0, 0, 1, 0, 1, 0])
y_pred = np.array([1, 0, 1, 0, 0, 1, 1, 0, 1, 0])

metrics = compute_metrics(y_true, y_pred)
for name, value in metrics.items():
    print(f'{name}: {value:.3f}')

## Summary

| Component | Purpose | Math |
|-----------|---------|------|
| Convolution | Feature extraction | $\sum I \cdot K$ |
| ReLU | Non-linearity | max(0, x) |
| Pooling | Downsampling | max/avg over region |
| FC Layer | Classification | Wx + b |
| Softmax | Probabilities | $e^{z_i}/\sum e^{z_j}$ |