# Week 04_05: CNNs and RNNs

Implementing deep learning architectures from scratch.

## Learning Objectives
1. Implement Convolutional Neural Networks (CNN)
2. Implement Recurrent Neural Networks (RNN)
3. Understand LSTM cells

In [None]:
import numpy as np
from typing import Tuple, List

## 1. Convolution Layer

In [None]:
class Conv2D:
    """
    2D Convolutional Layer.
    
    Input: (batch, channels, height, width)
    Filters: (out_channels, in_channels, kernel_h, kernel_w)
    """
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0):
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        
        # Xavier initialization
        scale = np.sqrt(2.0 / (in_channels * kernel_size * kernel_size))
        self.filters = np.random.randn(out_channels, in_channels, kernel_size, kernel_size) * scale
        self.bias = np.zeros(out_channels)
    
    def forward(self, X: np.ndarray) -> np.ndarray:
        batch, in_c, h, w = X.shape
        out_c, _, k_h, k_w = self.filters.shape
        
        # Calculate output dimensions
        h_out = (h + 2 * self.padding - k_h) // self.stride + 1
        w_out = (w + 2 * self.padding - k_w) // self.stride + 1
        
        # Padding
        X_pad = np.pad(X, ((0,0), (0,0), (self.padding, self.padding), (self.padding, self.padding)))
        
        output = np.zeros((batch, out_c, h_out, w_out))
        
        # Naive implementation (slow but educational)
        for b in range(batch):
            for c in range(out_c):
                for i in range(h_out):
                    for j in range(w_out):
                        h_start = i * self.stride
                        h_end = h_start + k_h
                        w_start = j * self.stride
                        w_end = w_start + k_w
                        
                        receptive_field = X_pad[b, :, h_start:h_end, w_start:w_end]
                        output[b, c, i, j] = np.sum(receptive_field * self.filters[c]) + self.bias[c]
        
        return output

In [None]:
# Test Convolution
conv = Conv2D(in_channels=3, out_channels=8, kernel_size=3, padding=1)
X = np.random.randn(2, 3, 32, 32)  # Batch of 2 images
out = conv.forward(X)
print(f"Conv Input: {X.shape}")
print(f"Conv Output: {out.shape}")

## 2. Pooling Layer

In [None]:
class MaxPool2D:
    """Max Pooling Layer."""
    def __init__(self, kernel_size: int = 2, stride: int = 2):
        self.kernel_size = kernel_size
        self.stride = stride
        
    def forward(self, X: np.ndarray) -> np.ndarray:
        batch, c, h, w = X.shape
        h_out = (h - self.kernel_size) // self.stride + 1
        w_out = (w - self.kernel_size) // self.stride + 1
        
        output = np.zeros((batch, c, h_out, w_out))
        
        for b in range(batch):
            for ch in range(c):
                for i in range(h_out):
                    for j in range(w_out):
                        h_start = i * self.stride
                        h_end = h_start + self.kernel_size
                        w_start = j * self.stride
                        w_end = w_start + self.kernel_size
                        
                        output[b, ch, i, j] = np.max(X[b, ch, h_start:h_end, w_start:w_end])
        return output

In [None]:
# Test MaxPool
pool = MaxPool2D(kernel_size=2, stride=2)
out_pool = pool.forward(out)
print(f"Pool Output: {out_pool.shape}")

## 3. Recurrent Neural Network (RNN)

In [None]:
class RNNCell:
    """
    Vanilla RNN Cell.
    h_t = tanh(W_xh @ x_t + W_hh @ h_{t-1} + b)
    y_t = W_hy @ h_t + b_y
    """
    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        self.hidden_size = hidden_size
        
        # Initialize weights
        self.W_xh = np.random.randn(input_size, hidden_size) * 0.01
        self.W_hh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.W_hy = np.random.randn(hidden_size, output_size) * 0.01
        
        self.b_h = np.zeros(hidden_size)
        self.b_y = np.zeros(output_size)
    
    def forward(self, inputs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Forward pass for a sequence.
        Inputs: (seq_len, batch, input_size)
        """
        seq_len, batch, _ = inputs.shape
        h = np.zeros((batch, self.hidden_size))
        
        hidden_states = []
        outputs = []
        
        for t in range(seq_len):
            x_t = inputs[t]
            
            # Update hidden state
            h = np.tanh(x_t @ self.W_xh + h @ self.W_hh + self.b_h)
            
            # Compute output
            y = h @ self.W_hy + self.b_y
            
            hidden_states.append(h)
            outputs.append(y)
            
        return np.array(outputs), np.array(hidden_states)

In [None]:
# Test RNN
rnn = RNNCell(input_size=10, hidden_size=20, output_size=5)
X_seq = np.random.randn(15, 32, 10)  # (seq_len, batch, input_size)

outputs, hiddens = rnn.forward(X_seq)
print(f"RNN Output Shape: {outputs.shape}")  # (seq_len, batch, output_size)
print(f"Hidden States Shape: {hiddens.shape}")

## 4. LSTM Cell (Exercise)

In [None]:
class LSTMCell:
    """
    Long Short-Term Memory Cell.
    
    Gates:
    - Forget: f_t = σ(W_f @ [h_{t-1}, x_t] + b_f)
    - Input: i_t = σ(W_i @ [h_{t-1}, x_t] + b_i)
    - Output: o_t = σ(W_o @ [h_{t-1}, x_t] + b_o)
    - Cell candidate: g_t = tanh(W_g @ [h_{t-1}, x_t] + b_g)
    
    Update:
    - c_t = f_t * c_{t-1} + i_t * g_t
    - h_t = o_t * tanh(c_t)
    """
    def __init__(self, input_size: int, hidden_size: int):
        self.hidden_size = hidden_size
        concat_size = input_size + hidden_size
        
        # Weights for all gates combined
        self.W = np.random.randn(concat_size, 4 * hidden_size) * 0.01
        self.b = np.zeros(4 * hidden_size)
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    
    def forward_step(self, x_t: np.ndarray, h_prev: np.ndarray, c_prev: np.ndarray):
        batch = x_t.shape[0]
        
        # Concatenate x and h
        concat = np.hstack((h_prev, x_t))
        
        # Compute all gates at once
        gates = concat @ self.W + self.b
        
        # Split gates
        f_gate = self.sigmoid(gates[:, :self.hidden_size])
        i_gate = self.sigmoid(gates[:, self.hidden_size:2*self.hidden_size])
        o_gate = self.sigmoid(gates[:, 2*self.hidden_size:3*self.hidden_size])
        cell_cand = np.tanh(gates[:, 3*self.hidden_size:])
        
        # Update cell and hidden state
        c_t = f_gate * c_prev + i_gate * cell_cand
        h_t = o_gate * np.tanh(c_t)
        
        return h_t, c_t

In [None]:
# Test LSTM Step
lstm = LSTMCell(input_size=10, hidden_size=20)
x_t = np.random.randn(32, 10)
h_prev = np.zeros((32, 20))
c_prev = np.zeros((32, 20))

h_new, c_new = lstm.forward_step(x_t, h_prev, c_prev)
print(f"LSTM h_t shape: {h_new.shape}")
print(f"LSTM c_t shape: {c_new.shape}")