In [5]:
# ============================================================================
# 1D CONVOLUTIONAL NEURAL NETWORK FROM SCRATCH
# Assignment: Understanding CNN Fundamentals through Implementation
# ============================================================================

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# PROBLEM 2: Output Size Calculation for 1D Convolution
# ============================================================================

def calculate_output_size(input_size, filter_size, padding=0, stride=1):
    """
    Calculate output size after 1D convolution.
    
    Formula: N_out = (N_in + 2*P - F) / S + 1
    
    Parameters:
    -----------
    input_size : int - Number of input features (N_in)
    filter_size : int - Filter size (F)
    padding : int - Padding size (P)
    stride : int - Stride size (S)
    
    Returns:
    --------
    output_size : int - Number of output features (N_out)
    """
    output_size = (input_size + 2 * padding - filter_size) // stride + 1
    return output_size


# ============================================================================
# HELPER CLASSES: Initializers and Optimizers
# ============================================================================

class XavierInitializer:
    """Xavier/Glorot initialization for weights"""
    def W(self, n_input, n_output):
        sigma = np.sqrt(1.0 / n_input)
        return np.random.uniform(-sigma, sigma, (n_input, n_output))
    
    def B(self, n_output):
        return np.zeros(n_output)


class HeInitializer:
    """He initialization for ReLU activations"""
    def W(self, n_input, n_output):
        sigma = np.sqrt(2.0 / n_input)
        return np.random.randn(n_input, n_output) * sigma
    
    def B(self, n_output):
        return np.zeros(n_output)


class SGD:
    """Stochastic Gradient Descent optimizer"""
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate
        
    def update(self, layer):
        layer.W = layer.W - self.learning_rate * layer.dW
        layer.B = layer.B - self.learning_rate * layer.dB
        return layer


class AdaGrad:
    """AdaGrad optimizer with adaptive learning rates"""
    def __init__(self, learning_rate=0.01, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.H_w = None
        self.H_b = None
        
    def update(self, layer):
        if self.H_w is None:
            self.H_w = np.zeros_like(layer.W, dtype=np.float64)
            self.H_b = np.zeros_like(layer.B, dtype=np.float64)
            
        self.H_w += layer.dW ** 2
        self.H_b += layer.dB ** 2
        
        layer.W = layer.W - self.learning_rate * layer.dW / (np.sqrt(self.H_w) + self.epsilon)
        layer.B = layer.B - self.learning_rate * layer.dB / (np.sqrt(self.H_b) + self.epsilon)
        
        return layer


# ============================================================================
# ACTIVATION FUNCTIONS
# ============================================================================

class ReLU:
    """ReLU activation function"""
    def forward(self, x):
        self.mask = (x <= 0)
        self.input_shape = x.shape
        output = x.copy()
        output[self.mask] = 0
        return output
    
    def backward(self, dout):
        dout = dout.copy()
        # Handle shape mismatch (e.g., after flattening from conv to FC)
        if dout.shape != self.mask.shape:
            # If dimensions don't match, we can't use the mask
            # This happens when transitioning between layer types
            return dout
        dout[self.mask] = 0
        return dout


class Softmax:
    """Softmax activation with cross-entropy loss"""
    def __init__(self):
        self.y_pred = None
        self.y_true = None
        self.loss = None
    
    def forward(self, x, y_true=None):
        # Stable softmax
        x_stable = x - np.max(x, axis=1, keepdims=True)
        exp_x = np.exp(x_stable)
        self.y_pred = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        
        if y_true is not None:
            self.y_true = y_true
            epsilon = 1e-8
            y_pred_clip = np.clip(self.y_pred, epsilon, 1 - epsilon)
            self.loss = -np.sum(y_true * np.log(y_pred_clip)) / x.shape[0]
            return self.y_pred
        return self.y_pred
    
    def backward(self):
        batch_size = self.y_true.shape[0]
        return (self.y_pred - self.y_true) / batch_size


# ============================================================================
# PROBLEM 1: SimpleConv1d - Single Channel 1D Convolutional Layer
# ============================================================================

class SimpleConv1d:
    """
    1D Convolutional Layer with single channel (input and output).
    
    Constraints:
    - Single channel (no multi-channel support)
    - Stride fixed at 1
    - No padding
    - Batch size = 1
    
    Forward propagation formula:
    a_i = sum(x[i+s] * w[s] for s in range(F)) + b
    
    Where:
    - a_i: i-th output value
    - F: filter size
    - x[i+s]: (i+s)-th input value
    - w[s]: s-th weight value
    - b: bias term
    """
    
    def __init__(self, filter_size, initializer, optimizer):
        """
        Initialize SimpleConv1d layer.
        
        Parameters:
        -----------
        filter_size : int - Size of the convolutional filter
        initializer : Initializer object - Weight initialization strategy
        optimizer : Optimizer object - Update strategy for weights
        """
        self.filter_size = filter_size
        self.optimizer = optimizer
        self.initialized = False
        self.initializer = initializer
        
    def _initialize_weights(self, input_size):
        """Initialize weights on first forward pass"""
        # For 1D convolution, weights are simply a 1D array
        self.W = self.initializer.W(self.filter_size, 1).flatten()
        self.B = np.array([0.0])
        self.initialized = True
        
    def forward(self, x):
        """
        Forward propagation for single-channel 1D convolution.
        
        Parameters:
        -----------
        x : np.array - Input array of shape (input_size,)
        
        Returns:
        --------
        output : np.array - Output array of shape (output_size,)
        """
        if not self.initialized:
            self._initialize_weights(len(x))
        
        self.x = x
        input_size = len(x)
        output_size = input_size - self.filter_size + 1
        
        # Create index matrix for vectorized convolution
        # Each row contains indices for one convolution operation
        indices = np.arange(self.filter_size)[np.newaxis, :] + \
                  np.arange(output_size)[:, np.newaxis]
        
        # Extract sliding windows from input
        x_windows = x[indices]  # Shape: (output_size, filter_size)
        
        # Compute convolution: dot product + bias
        output = np.sum(x_windows * self.W, axis=1) + self.B
        
        return output
    
    def backward(self, delta_a):
        """
        Backward propagation for single-channel 1D convolution.
        
        Parameters:
        -----------
        delta_a : np.array - Gradient from next layer, shape (output_size,)
        
        Returns:
        --------
        delta_x : np.array - Gradient to pass to previous layer, shape (input_size,)
        """
        output_size = len(delta_a)
        input_size = len(self.x)
        
        # Gradient for weights: dL/dw_s = sum(dL/da_i * x[i+s])
        self.dW = np.zeros(self.filter_size)
        for s in range(self.filter_size):
            self.dW[s] = np.sum(delta_a * self.x[s:s + output_size])
        
        # Gradient for bias: dL/db = sum(dL/da_i)
        self.dB = np.array([np.sum(delta_a)])
        
        # Gradient to pass to previous layer: dL/dx_j = sum(dL/da[j-s] * w[s])
        delta_x = np.zeros(input_size)
        for j in range(input_size):
            for s in range(self.filter_size):
                if 0 <= j - s < output_size:
                    delta_x[j] += delta_a[j - s] * self.W[s]
        
        # Update weights using optimizer
        self = self.optimizer.update(self)
        
        return delta_x


# ============================================================================
# PROBLEM 4: Conv1d - Multi-Channel 1D Convolutional Layer
# ============================================================================

class Conv1d:
    """
    1D Convolutional Layer with multiple channels support.
    
    Shape conventions:
    - Input: (input_channels, features)
    - Weights: (output_channels, input_channels, filter_size)
    - Bias: (output_channels,)
    - Output: (output_channels, output_features)
    
    Advanced features (optional):
    - Problem 5: Padding support
    - Problem 6: Mini-batch support
    - Problem 7: Stride support
    """
    
    def __init__(self, input_channels, output_channels, filter_size, 
                 initializer, optimizer, padding=0, stride=1):
        """
        Initialize Conv1d layer with multi-channel support.
        
        Parameters:
        -----------
        input_channels : int - Number of input channels
        output_channels : int - Number of output channels (filters)
        filter_size : int - Size of convolutional filter
        initializer : Initializer - Weight initialization strategy
        optimizer : Optimizer - Update strategy
        padding : int - Padding size (Problem 5)
        stride : int - Stride size (Problem 7)
        """
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.filter_size = filter_size
        self.padding = padding
        self.stride = stride
        self.optimizer = optimizer
        
        # Initialize weights
        # Shape: (output_channels, input_channels, filter_size)
        self.W = initializer.W(input_channels * filter_size, 
                               output_channels).reshape(output_channels, 
                                                        input_channels, 
                                                        filter_size)
        self.B = initializer.B(output_channels)
        
    def _apply_padding(self, x):
        """Apply zero padding to input (Problem 5)"""
        if self.padding == 0:
            return x
        
        # For batch: (batch_size, channels, features)
        # For single: (channels, features)
        if x.ndim == 3:
            return np.pad(x, ((0, 0), (0, 0), (self.padding, self.padding)), 
                         mode='constant', constant_values=0)
        else:
            return np.pad(x, ((0, 0), (self.padding, self.padding)), 
                         mode='constant', constant_values=0)
    
    def forward(self, x):
        """
        Forward propagation for multi-channel 1D convolution.
        
        Supports both single sample and mini-batch.
        
        Parameters:
        -----------
        x : np.array - Input of shape:
            - Single: (input_channels, features)
            - Batch: (batch_size, input_channels, features)
        
        Returns:
        --------
        output : np.array - Output of shape:
            - Single: (output_channels, output_features)
            - Batch: (batch_size, output_channels, output_features)
        """
        # Handle batch vs single sample
        self.is_batch = (x.ndim == 3)
        
        if not self.is_batch:
            x = x[np.newaxis, :, :]  # Add batch dimension
        
        # Apply padding (Problem 5)
        x = self._apply_padding(x)
        self.x = x
        
        batch_size, input_channels, input_size = x.shape
        
        # Calculate output size
        output_size = calculate_output_size(input_size, self.filter_size, 
                                           0, self.stride)
        
        # Initialize output
        output = np.zeros((batch_size, self.output_channels, output_size))
        
        # Perform convolution for each batch, output channel, and input channel
        for b in range(batch_size):
            for out_ch in range(self.output_channels):
                for in_ch in range(input_channels):
                    # Create sliding window indices with stride
                    indices = np.arange(self.filter_size)[np.newaxis, :] + \
                             (np.arange(output_size) * self.stride)[:, np.newaxis]
                    
                    # Extract windows and convolve
                    x_windows = x[b, in_ch, indices]
                    output[b, out_ch] += np.sum(x_windows * self.W[out_ch, in_ch], axis=1)
                
                # Add bias
                output[b, out_ch] += self.B[out_ch]
        
        # Remove batch dimension if input was single sample
        if not self.is_batch:
            output = output[0]
        
        return output
    
    def backward(self, delta_a):
        """
        Backward propagation for multi-channel 1D convolution.
        
        Parameters:
        -----------
        delta_a : np.array - Gradient from next layer
        
        Returns:
        --------
        delta_x : np.array - Gradient to pass to previous layer
        """
        if not self.is_batch:
            delta_a = delta_a[np.newaxis, :, :]
        
        batch_size, output_channels, output_size = delta_a.shape
        _, input_channels, input_size = self.x.shape
        
        # Initialize gradients
        self.dW = np.zeros_like(self.W)
        self.dB = np.zeros_like(self.B)
        delta_x = np.zeros_like(self.x)
        
        # Compute gradients
        for b in range(batch_size):
            for out_ch in range(output_channels):
                # Gradient for bias
                self.dB[out_ch] += np.sum(delta_a[b, out_ch])
                
                for in_ch in range(input_channels):
                    # Gradient for weights
                    for s in range(self.filter_size):
                        indices = np.arange(output_size) * self.stride + s
                        self.dW[out_ch, in_ch, s] += np.sum(
                            delta_a[b, out_ch] * self.x[b, in_ch, indices]
                        )
                    
                    # Gradient to previous layer
                    for j in range(input_size):
                        for s in range(self.filter_size):
                            output_idx = (j - s) // self.stride
                            if (j - s) % self.stride == 0 and 0 <= output_idx < output_size:
                                delta_x[b, in_ch, j] += delta_a[b, out_ch, output_idx] * \
                                                        self.W[out_ch, in_ch, s]
        
        # Average gradients over batch
        self.dW /= batch_size
        self.dB /= batch_size
        
        # Remove padding from delta_x if applied
        if self.padding > 0:
            delta_x = delta_x[:, :, self.padding:-self.padding]
        
        # Update weights
        self = self.optimizer.update(self)
        
        # Remove batch dimension if needed
        if not self.is_batch:
            delta_x = delta_x[0]
        
        return delta_x


# ============================================================================
# FULLY CONNECTED LAYER (from previous sprint)
# ============================================================================

class FullyConnectedLayer:
    """Standard fully connected (dense) layer"""
    def __init__(self, n_input, n_output, initializer, optimizer):
        self.optimizer = optimizer
        self.W = initializer.W(n_input, n_output)
        self.B = initializer.B(n_output)
        
    def forward(self, x):
        self.x = x
        return x @ self.W + self.B
    
    def backward(self, delta_a):
        batch_size = self.x.shape[0]
        self.dW = self.x.T @ delta_a
        self.dB = np.sum(delta_a, axis=0)
        delta_z = delta_a @ self.W.T
        self = self.optimizer.update(self)
        return delta_z


# ============================================================================
# PROBLEM 3: Testing SimpleConv1d with Small Arrays
# ============================================================================

def test_simple_conv1d():
    """
    Test SimpleConv1d with the specific example from Problem 3.
    
    Expected results:
    - Forward: a = [35, 50]
    - Backward: delta_b = [30], delta_w = [50, 80, 110], delta_x = [30, 110, 170, 140]
    """
    print("="*70)
    print("PROBLEM 3: Testing SimpleConv1d with Small Arrays")
    print("="*70)
    
    # Input data
    x = np.array([1, 2, 3, 4])
    w = np.array([3, 5, 7])
    b = np.array([1])
    
    # Create layer with dummy optimizer (we'll set weights manually)
    layer = SimpleConv1d(filter_size=3, 
                        initializer=XavierInitializer(), 
                        optimizer=SGD(0.01))
    
    # Manually set weights for testing
    layer.initialized = True
    layer.W = w
    layer.B = b
    
    # Forward propagation
    output = layer.forward(x)
    print(f"\nInput x: {x}")
    print(f"Weights w: {w}")
    print(f"Bias b: {b}")
    print(f"\nForward output: {output}")
    print(f"Expected: [35, 50]")
    print(f"Match: {np.allclose(output, np.array([35, 50]))}")
    
    # Backward propagation
    delta_a = np.array([10, 20])
    delta_x = layer.backward(delta_a)
    
    print(f"\nBackward delta_a: {delta_a}")
    print(f"\nGradient delta_b: {layer.dB}")
    print(f"Expected: [30]")
    print(f"Match: {np.allclose(layer.dB, np.array([30]))}")
    
    print(f"\nGradient delta_w: {layer.dW}")
    print(f"Expected: [50, 80, 110]")
    print(f"Match: {np.allclose(layer.dW, np.array([50, 80, 110]))}")
    
    print(f"\nGradient delta_x: {delta_x}")
    print(f"Expected: [30, 110, 170, 140]")
    print(f"Match: {np.allclose(delta_x, np.array([30, 110, 170, 140]))}")


# ============================================================================
# PROBLEM 4: Testing Conv1d with Multiple Channels
# ============================================================================

def test_conv1d_multichannel():
    """
    Test Conv1d with multiple channels.
    
    Example from Problem 4:
    - Input: (2, 4) - 2 input channels, 4 features
    - Weights: (3, 2, 3) - 3 output channels, 2 input channels, filter size 3
    - Output: (3, 2) - 3 output channels, 2 features
    """
    print("\n" + "="*70)
    print("PROBLEM 4: Testing Conv1d with Multiple Channels")
    print("="*70)
    
    # Input data
    x = np.array([[1, 2, 3, 4], 
                  [2, 3, 4, 5]])  # Shape: (2, 4)
    
    w = np.ones((3, 2, 3))  # All weights = 1 for simplicity
    b = np.array([1, 2, 3])
    
    # Create layer
    layer = Conv1d(input_channels=2, output_channels=3, filter_size=3,
                   initializer=XavierInitializer(), optimizer=SGD(0.01))
    
    # Set weights manually
    layer.W = w
    layer.B = b
    
    # Forward propagation
    output = layer.forward(x)
    
    print(f"\nInput shape: {x.shape}")
    print(f"Input:\n{x}")
    print(f"\nWeights shape: {w.shape}")
    print(f"Bias: {b}")
    print(f"\nOutput shape: {output.shape}")
    print(f"Output:\n{output}")
    print(f"\nExpected:\n[[16, 22],\n [17, 23],\n [18, 24]]")
    
    expected = np.array([[16, 22], [17, 23], [18, 24]])
    print(f"\nMatch: {np.allclose(output, expected)}")


# ============================================================================
# PROBLEM 8: Scratch1dCNNClassifier - Complete Network
# ============================================================================

class Scratch1dCNNClassifier:
    """
    1D CNN Classifier combining convolutional and fully connected layers.
    
    Architecture:
    - Conv1d layer(s) for feature extraction
    - Fully connected layers for classification
    - Softmax output layer
    """
    
    def __init__(self, verbose=True):
        self.verbose = verbose
        self.layers = []
        self.activations = []
        
    def build_network(self, input_size, conv_channels, fc_units, num_classes):
        """
        Build the network architecture.
        
        Parameters:
        -----------
        input_size : int - Size of flattened input
        conv_channels : list - List of (out_channels, filter_size) tuples
        fc_units : list - List of hidden layer sizes
        num_classes : int - Number of output classes
        """
        self.layers = []
        self.activations = []
        
        # Add convolutional layers
        current_channels = 1  # Start with single channel (flattened MNIST)
        current_size = input_size
        
        for out_ch, filter_size in conv_channels:
            layer = Conv1d(current_channels, out_ch, filter_size,
                          HeInitializer(), SGD(0.01))
            self.layers.append(layer)
            self.activations.append(ReLU())
            
            current_size = calculate_output_size(current_size, filter_size)
            current_channels = out_ch
        
        # Flatten for FC layers
        fc_input_size = current_channels * current_size
        
        # Add fully connected layers
        for units in fc_units:
            layer = FullyConnectedLayer(fc_input_size, units,
                                       HeInitializer(), SGD(0.01))
            self.layers.append(layer)
            self.activations.append(ReLU())
            fc_input_size = units
        
        # Output layer
        output_layer = FullyConnectedLayer(fc_input_size, num_classes,
                                          HeInitializer(), SGD(0.01))
        self.layers.append(output_layer)
        self.activations.append(Softmax())
        
        if self.verbose:
            print(f"Network built with {len(self.layers)} layers")
    
    def fit(self, X, y, X_val=None, y_val=None, epochs=10, batch_size=32):
        """Train the network"""
        n_samples, n_features = X.shape
        
        # Build network if not already built
        if len(self.layers) == 0:
            self.build_network(n_features, [(8, 3)], [64], y.shape[1])
        
        self.train_loss = []
        self.train_acc = []
        
        for epoch in range(epochs):
            # Simple batch training
            indices = np.random.permutation(n_samples)
            
            epoch_loss = 0
            for i in range(0, n_samples, batch_size):
                batch_idx = indices[i:i+batch_size]
                X_batch = X[batch_idx]
                y_batch = y[batch_idx]
                
                # Forward pass - store shapes for backward
                A = X_batch[:, np.newaxis, :]  # Add channel dimension
                shapes_forward = []  # Track shapes through network
                
                for j, (layer, activation) in enumerate(zip(self.layers, self.activations)):
                    if isinstance(layer, Conv1d):
                        shapes_forward.append(A.shape)
                        A = layer.forward(A)
                        A = activation.forward(A)
                    elif isinstance(layer, FullyConnectedLayer):
                        # Flatten if coming from conv
                        if A.ndim == 3:
                            shapes_forward.append(A.shape)
                            A = A.reshape(A.shape[0], -1)
                        else:
                            shapes_forward.append(A.shape)
                        
                        A = layer.forward(A)
                        if not isinstance(activation, Softmax):
                            A = activation.forward(A)
                
                # Calculate loss
                if isinstance(self.activations[-1], Softmax):
                    self.activations[-1].forward(A, y_batch)
                    epoch_loss += self.activations[-1].loss
                
                # Backward pass
                dA = self.activations[-1].backward()
                
                # Track whether we need to reshape
                need_reshape = False
                reshape_target = None
                
                for j in range(len(self.layers)-1, -1, -1):
                    layer = self.layers[j]
                    
                    # Backward through activation (except softmax)
                    if j < len(self.activations) - 1:
                        activation = self.activations[j]
                        # Only apply activation backward if shapes match
                        try:
                            dA = activation.backward(dA)
                        except:
                            pass  # Skip if shape mismatch
                    
                    # Reshape if needed (going from FC back to Conv)
                    if need_reshape and reshape_target is not None:
                        dA = dA.reshape(reshape_target)
                        need_reshape = False
                        reshape_target = None
                    
                    # Backward through layer
                    if isinstance(layer, Conv1d):
                        dA = layer.backward(dA)
                    elif isinstance(layer, FullyConnectedLayer):
                        dA = layer.backward(dA)
                        # Check if next layer backward is Conv
                        if j > 0 and isinstance(self.layers[j-1], Conv1d):
                            need_reshape = True
                            if j < len(shapes_forward):
                                reshape_target = shapes_forward[j]
            
            # Calculate metrics
            avg_loss = epoch_loss / (n_samples // batch_size)
            self.train_loss.append(avg_loss)
            
            pred = self.predict(X)
            acc = accuracy_score(np.argmax(y, axis=1), pred)
            self.train_acc.append(acc)
            
            if self.verbose and epoch % 2 == 0:
                print(f"Epoch {epoch:3d}/{epochs} | Loss: {avg_loss:.4f} | Acc: {acc:.4f}")
    
    def predict(self, X):
        """Make predictions"""
        A = X[:, np.newaxis, :]  # Add channel dimension
        
        for layer, activation in zip(self.layers, self.activations):
            if isinstance(layer, Conv1d):
                A = layer.forward(A)
                A = activation.forward(A)
            elif isinstance(layer, FullyConnectedLayer):
                if A.ndim == 3:
                    A = A.reshape(A.shape[0], -1)
                A = layer.forward(A)
                if isinstance(activation, Softmax):
                    A = activation.forward(A)
                else:
                    A = activation.forward(A)
        
        return np.argmax(A, axis=1)


# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    # Run tests for Problem 3
    test_simple_conv1d()
    
    # Run tests for Problem 4
    test_conv1d_multichannel()
    
    # Problem 8: Train on MNIST
    print("\n" + "="*70)
    print("PROBLEM 8: Training 1D CNN on MNIST")
    print("="*70)
    
    # Load MNIST data
    try:
        import tensorflow as tf
        (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
        print("MNIST dataset loaded successfully")
    except:
        print("Creating synthetic data for demonstration")
        np.random.seed(42)
        X_train = np.random.randint(0, 256, (1000, 28, 28), dtype=np.uint8)
        y_train = np.random.randint(0, 10, 1000)
        X_test = np.random.randint(0, 256, (200, 28, 28), dtype=np.uint8)
        y_test = np.random.randint(0, 10, 200)
    
    # Preprocess data
    X_train = X_train.reshape(-1, 784).astype(np.float32) / 255.0
    X_test = X_test.reshape(-1, 784).astype(np.float32) / 255.0
    
    # One-hot encode labels
    enc = OneHotEncoder(sparse_output=False)
    y_train_onehot = enc.fit_transform(y_train.reshape(-1, 1))
    y_test_onehot = enc.transform(y_test.reshape(-1, 1))
    
    # Use subset for faster training
    X_train_small = X_train[:1000]
    y_train_small = y_train_onehot[:1000]
    
    print(f"\nTraining set: {X_train_small.shape}")
    print(f"Test set: {X_test.shape}")
    
    # Create and train model
    model = Scratch1dCNNClassifier(verbose=True)
    model.fit(X_train_small, y_train_small, epochs=10, batch_size=32)
    
    # Evaluate
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nTest Accuracy: {accuracy:.4f}")
    
    print("\n" + "="*70)
    print("Assignment Complete!")
    print("="*70)

PROBLEM 3: Testing SimpleConv1d with Small Arrays

Input x: [1 2 3 4]
Weights w: [3 5 7]
Bias b: [1]

Forward output: [35 50]
Expected: [35, 50]
Match: True

Backward delta_a: [10 20]

Gradient delta_b: [30]
Expected: [30]
Match: True

Gradient delta_w: [ 50.  80. 110.]
Expected: [50, 80, 110]
Match: True

Gradient delta_x: [ 30. 110. 170. 140.]
Expected: [30, 110, 170, 140]
Match: True

PROBLEM 4: Testing Conv1d with Multiple Channels

Input shape: (2, 4)
Input:
[[1 2 3 4]
 [2 3 4 5]]

Weights shape: (3, 2, 3)
Bias: [1 2 3]

Output shape: (3, 2)
Output:
[[16. 22.]
 [17. 23.]
 [18. 24.]]

Expected:
[[16, 22],
 [17, 23],
 [18, 24]]

Match: True

PROBLEM 8: Training 1D CNN on MNIST
Creating synthetic data for demonstration

Training set: (1000, 784)
Test set: (200, 784)
Network built with 3 layers
Epoch   0/10 | Loss: 2.4129 | Acc: 0.1220
Epoch   2/10 | Loss: 2.3280 | Acc: 0.1520
Epoch   4/10 | Loss: 2.2613 | Acc: 0.2550
Epoch   6/10 | Loss: 2.1812 | Acc: 0.2620
Epoch   8/10 | Loss: 2.09