# Two-Layer Custom ANN Implementation (2-4-1 Architecture)

**Assignment: Add One Hidden Layer**

**Model Architecture:**
- Input Layer: 2 neurons
- Hidden Layer: 4 neurons (ReLU activation)
- Output Layer: 1 neuron (Sigmoid activation)
- Loss: Binary Cross Entropy
- Optimizer: Manual weight update using gradients with .backward()

**Network Structure: 2-4-1**

**Forward Pass:**
```
Z1 = X @ W1 + b1
A1 = torch.relu(Z1)
Z2 = A1 @ W2 + b2
Y_pred = torch.sigmoid(Z2)
```

In [None]:
# Import required libraries
import torch
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import os

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

print(f"PyTorch version: {torch.__version__}")
print(f"Device available: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

## 1. Load Dataset (Same as Q2)

In [None]:
# Load dataset from CSV (same as Q2)
def load_dataset(csv_path='binary_data.csv'):
    if os.path.exists(csv_path):
        print(f"Loading dataset from: {csv_path}")
        df = pd.read_csv(csv_path)
        X = df[['f1', 'f2']].values
        y = df['label'].values
        print(f"Loaded dataset shape: {df.shape}")
        print(f"Label distribution: {df['label'].value_counts().to_dict()}")
        return X, y, df
    else:
        print(f"CSV file {csv_path} not found. Please run Q2 first to generate the dataset.")
        return None, None, None

# Load the dataset
X, y, df = load_dataset()

if X is not None:
    print("\nFirst 5 rows of the dataset:")
    print(df.head())
    
    # Visualize the dataset
    plt.figure(figsize=(8, 6))
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', alpha=0.7)
    plt.xlabel('Feature 1 (f1)')
    plt.ylabel('Feature 2 (f2)')
    plt.title('Binary Classification Dataset (Same as Q2)')
    plt.colorbar(scatter)
    plt.grid(True, alpha=0.3)
    plt.show()
else:
    print("Please run the single_layer_ann.py script first to generate the dataset.")

## 2. Data Preparation

In [None]:
# Prepare data for training (same as Q2)
def prepare_data(X, y, test_size=0.2, random_state=42):
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )
    
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Convert to PyTorch tensors
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1).to(device)
    y_test_tensor = torch.FloatTensor(y_test).reshape(-1, 1).to(device)
    
    print(f"Training set: {X_train_tensor.shape[0]} samples")
    print(f"Test set: {X_test_tensor.shape[0]} samples")
    print(f"Device: {device}")
    
    return X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor, device

if X is not None:
    # Prepare the data
    X_train, X_test, y_train, y_test, device = prepare_data(X, y)
    
    print(f"\nData shapes:")
    print(f"X_train: {X_train.shape}")
    print(f"X_test: {X_test.shape}")
    print(f"y_train: {y_train.shape}")
    print(f"y_test: {y_test.shape}")

## 3. Initialize Network Parameters (2-4-1 Architecture)

As specified in the assignment:

In [None]:
# Initialize network parameters as specified in the assignment
if X is not None:
    print("Initializing 2-4-1 Neural Network parameters...")
    
    # Layer 1: Input (2) -> Hidden (4)
    W1 = torch.randn(2, 4, requires_grad=True, device=device)
    b1 = torch.zeros(1, 4, requires_grad=True, device=device)
    
    # Layer 2: Hidden (4) -> Output (1)
    W2 = torch.randn(4, 1, requires_grad=True, device=device)
    b2 = torch.zeros(1, 1, requires_grad=True, device=device)
    
    print(f"\nNetwork Architecture: 2-4-1")
    print(f"W1 shape: {W1.shape} (Input -> Hidden)")
    print(f"b1 shape: {b1.shape}")
    print(f"W2 shape: {W2.shape} (Hidden -> Output)")
    print(f"b2 shape: {b2.shape}")
    print(f"Total parameters: {2*4 + 4 + 4*1 + 1} = 17")
    
    # Display initial weights
    print(f"\nInitial weights:")
    print(f"W1:\n{W1.detach().cpu().numpy()}")
    print(f"b1: {b1.detach().cpu().numpy()}")
    print(f"W2:\n{W2.detach().cpu().numpy()}")
    print(f"b2: {b2.detach().cpu().numpy()}")

## 4. Forward Pass Implementation

Implement the forward pass as specified:
```
Z1 = X @ W1 + b1
A1 = torch.relu(Z1)
Z2 = A1 @ W2 + b2
Y_pred = torch.sigmoid(Z2)
```

In [None]:
def forward_pass(X, W1, b1, W2, b2):
    """
    Forward pass through the 2-4-1 network
    
    Forward Pass:
    Z1 = X @ W1 + b1
    A1 = torch.relu(Z1)
    Z2 = A1 @ W2 + b2
    Y_pred = torch.sigmoid(Z2)
    """
    # Layer 1: Linear transformation + ReLU activation
    Z1 = X @ W1 + b1
    A1 = torch.relu(Z1)
    
    # Layer 2: Linear transformation + Sigmoid activation
    Z2 = A1 @ W2 + b2
    Y_pred = torch.sigmoid(Z2)
    
    return Y_pred, A1, Z1  # Return intermediate values for analysis

def binary_cross_entropy_loss(y_pred, y_true):
    """Binary Cross Entropy Loss"""
    epsilon = 1e-15
    y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
    loss = -(y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred))
    return loss.mean()

if X is not None:
    # Test the forward pass
    print("Testing forward pass...")
    with torch.no_grad():
        y_pred, A1, Z1 = forward_pass(X_train[:5], W1, b1, W2, b2)
        print(f"Sample predictions (first 5): {y_pred.detach().cpu().numpy().flatten()}")
        print(f"Hidden layer activations shape: {A1.shape}")
        print(f"Sample hidden activations (first sample): {A1[0].detach().cpu().numpy()}")

## 5. Training Loop with Manual Weight Updates

In [None]:
def train_step(X, y, W1, b1, W2, b2, learning_rate):
    """
    Single training step using automatic differentiation and manual weight updates
    """
    # Forward pass
    y_pred, _, _ = forward_pass(X, W1, b1, W2, b2)
    
    # Compute loss
    loss = binary_cross_entropy_loss(y_pred, y)
    
    # Backward pass using automatic differentiation
    loss.backward()
    
    # Manual weight update using gradients
    with torch.no_grad():
        # Update weights and biases
        W1 -= learning_rate * W1.grad
        b1 -= learning_rate * b1.grad
        W2 -= learning_rate * W2.grad
        b2 -= learning_rate * b2.grad
        
        # Zero gradients after update
        W1.grad.zero_()
        b1.grad.zero_()
        W2.grad.zero_()
        b2.grad.zero_()
    
    return loss.item()

def calculate_accuracy(X, y, W1, b1, W2, b2):
    """Calculate accuracy"""
    with torch.no_grad():
        y_pred, _, _ = forward_pass(X, W1, b1, W2, b2)
        y_pred_binary = (y_pred >= 0.5).float()
        correct = (y_pred_binary == y).float().sum()
        accuracy = (correct / y.shape[0]) * 100
    return accuracy.item()

print("Training functions defined successfully!")

## 6. Training the Network

In [None]:
if X is not None:
    # Training parameters
    epochs = 50
    learning_rate = 0.1
    
    # Storage for metrics
    train_losses = []
    train_accuracies = []
    test_accuracies = []
    
    print(f"Training Two-Layer ANN (2-4-1) for {epochs} epochs...")
    print(f"Learning rate: {learning_rate}")
    print("=" * 60)
    
    for epoch in range(1, epochs + 1):
        # Training step
        loss = train_step(X_train, y_train, W1, b1, W2, b2, learning_rate)
        train_losses.append(loss)
        
        # Calculate accuracies
        train_acc = calculate_accuracy(X_train, y_train, W1, b1, W2, b2)
        test_acc = calculate_accuracy(X_test, y_test, W1, b1, W2, b2)
        
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)
        
        # Print progress
        if epoch == 1 or epoch % 10 == 0 or epoch == epochs:
            print(f"Epoch {epoch:2d}: Loss = {loss:.4f}, Train Acc = {train_acc:.1f}%, Test Acc = {test_acc:.1f}%")
    
    print("=" * 60)
    print("Training completed!")
    
    # Final results
    final_train_acc = train_accuracies[-1]
    final_test_acc = test_accuracies[-1]
    
    print(f"\nFinal Results:")
    print(f"Training Accuracy: {final_train_acc:.1f}%")
    print(f"Test Accuracy: {final_test_acc:.1f}%")

## 7. Results in Assignment Format

In [None]:
if X is not None:
    # Display results in the assignment's sample output format
    print("=" * 50)
    print("SAMPLE OUTPUT FORMAT (as requested):")
    print("=" * 50)
    print(f"Epoch 1: Loss = {train_losses[0]:.2f}")
    if len(train_losses) >= 30:
        print(f"Epoch 30: Loss = {train_losses[29]:.2f}")
    print(f"Accuracy: {test_accuracies[-1]:.1f}%")
    
    # Show final model parameters
    print(f"\n" + "=" * 50)
    print("FINAL MODEL PARAMETERS:")
    print("=" * 50)
    print(f"W1 (Input -> Hidden):\n{W1.detach().cpu().numpy()}")
    print(f"b1: {b1.detach().cpu().numpy()}")
    print(f"W2 (Hidden -> Output):\n{W2.detach().cpu().numpy()}")
    print(f"b2: {b2.detach().cpu().numpy()}")

## 8. Visualization and Analysis

In [None]:
if X is not None:
    # Create comprehensive visualization
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
    
    # Plot 1: Original dataset
    scatter = ax1.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', alpha=0.7)
    ax1.set_xlabel('Feature 1 (f1)')
    ax1.set_ylabel('Feature 2 (f2)')
    ax1.set_title('Original Dataset')
    plt.colorbar(scatter, ax=ax1)
    
    # Plot 2: Training loss
    ax2.plot(range(1, len(train_losses) + 1), train_losses, 'b-', linewidth=2)
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.set_title('Training Loss (2-4-1 Network)')
    ax2.grid(True, alpha=0.3)
    
    # Plot 3: Accuracy comparison
    ax3.plot(range(1, len(train_accuracies) + 1), train_accuracies, 'g-', label='Train Accuracy', linewidth=2)
    ax3.plot(range(1, len(test_accuracies) + 1), test_accuracies, 'r-', label='Test Accuracy', linewidth=2)
    ax3.set_xlabel('Epoch')
    ax3.set_ylabel('Accuracy (%)')
    ax3.set_title('Training vs Test Accuracy')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # Plot 4: Hidden layer activations for a sample
    with torch.no_grad():
        _, A1_sample, _ = forward_pass(X_train[:10], W1, b1, W2, b2)
        A1_np = A1_sample.detach().cpu().numpy()
    
    im = ax4.imshow(A1_np.T, cmap='viridis', aspect='auto')
    ax4.set_xlabel('Sample Index')
    ax4.set_ylabel('Hidden Neuron')
    ax4.set_title('Hidden Layer Activations (First 10 samples)')
    plt.colorbar(im, ax=ax4)
    
    plt.tight_layout()
    plt.show()
    
    print("Training visualization completed!")

## 9. Architecture Comparison and Summary

In [None]:
if X is not None:
    print("=" * 60)
    print("ASSIGNMENT COMPLETED SUCCESSFULLY!")
    print("=" * 60)
    
    print("\nTwo-Layer Network Architecture (2-4-1):")
    print(f"  Input Layer: 2 neurons")
    print(f"  Hidden Layer: 4 neurons (ReLU activation)")
    print(f"  Output Layer: 1 neuron (Sigmoid activation)")
    print(f"  Total parameters: {2*4 + 4 + 4*1 + 1} = 17")
    
    print("\nTraining Summary:")
    print(f"  Total epochs: {epochs}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Initial loss: {train_losses[0]:.4f}")
    print(f"  Final loss: {train_losses[-1]:.4f}")
    print(f"  Loss reduction: {((train_losses[0] - train_losses[-1]) / train_losses[0] * 100):.1f}%")
    
    print("\nFinal Performance:")
    print(f"  Training accuracy: {final_train_acc:.1f}%")
    print(f"  Test accuracy: {final_test_acc:.1f}%")
    
    print("\n✓ All requirements fulfilled:")
    print("  ✓ Used same dataset as Q2")
    print("  ✓ Implemented 2-4-1 architecture as specified")
    print("  ✓ Used specified initialization: W1=randn(2,4), b1=zeros(1,4), etc.")
    print("  ✓ Implemented forward pass: Z1=X@W1+b1, A1=relu(Z1), Z2=A1@W2+b2, Y=sigmoid(Z2)")
    print("  ✓ Used Binary Cross Entropy loss")
    print("  ✓ Used .backward() for automatic differentiation")
    print("  ✓ Manual weight updates with torch.no_grad()")
    print("  ✓ Proper gradient zeroing with .grad.zero_()")
    print("  ✓ Tracked loss and accuracy as requested")
else:
    print("Please run the single_layer_ann.py script first to generate the dataset.")