In [None]:
# mnist_neural_network.py
# Neural Network Demo for Introduction to Neural Networks Class
# Faculty of AI, UPH - Week 2 Session 2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Data preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load MNIST dataset
print("Loading MNIST dataset...")
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")

# Let's look at a sample image
def show_sample_images():
    """Display sample images from the dataset"""
    fig, axes = plt.subplots(2, 5, figsize=(12, 6))
    for i in range(10):
        row, col = i // 5, i % 5
        image, label = train_dataset[i]
        axes[row, col].imshow(image.squeeze(), cmap='gray')
        axes[row, col].set_title(f'Label: {label}')
        axes[row, col].axis('off')
    plt.suptitle('Sample MNIST Images')
    plt.tight_layout()
    plt.show()

# Neural Network Architecture
class MNISTNet(nn.Module):
    def __init__(self):
        super(MNISTNet, self).__init__()
        # Input layer: 28*28 = 784 pixels
        self.fc1 = nn.Linear(784, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 64)   # Second hidden layer  
        self.fc3 = nn.Linear(64, 10)    # Output layer (10 digits)
        
        # Dropout for regularization
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        # Flatten the image: 28x28 -> 784
        x = x.view(-1, 784)
        
        # First hidden layer with ReLU activation
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        
        # Second hidden layer with ReLU activation
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        
        # Output layer (no activation here, will use CrossEntropyLoss)
        x = self.fc3(x)
        return x

# Initialize model
model = MNISTNet().to(device)
print(f"Model architecture:\n{model}")

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train_epoch(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    total_loss = 0
    correct = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        output = model(data)
        loss = criterion(output, target)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        total_loss += loss.item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        
        # Print progress
        if batch_idx % 200 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data):5d}/{len(train_loader.dataset):5d} '
                  f'({100. * batch_idx / len(train_loader):3.0f}%)]\tLoss: {loss.item():.6f}')
    
    avg_loss = total_loss / len(train_loader)
    accuracy = 100. * correct / len(train_loader.dataset)
    return avg_loss, accuracy

# Testing function
def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader)
    accuracy = 100. * correct / len(test_loader.dataset)
    
    print(f'\nTest set: Average loss: {test_loss:.4f}, '
          f'Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')
    
    return test_loss, accuracy

# Training loop
def train_model(epochs=5):
    train_losses, train_accuracies = [], []
    test_losses, test_accuracies = [], []
    
    print("Starting training...")
    for epoch in range(1, epochs + 1):
        print(f"\n--- Epoch {epoch}/{epochs} ---")
        
        # Train
        train_loss, train_acc = train_epoch(model, device, train_loader, optimizer, criterion, epoch)
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        
        # Test
        test_loss, test_acc = test(model, device, test_loader, criterion)
        test_losses.append(test_loss)
        test_accuracies.append(test_acc)
        
        print(f"Epoch {epoch} Summary:")
        print(f"  Train - Loss: {train_loss:.4f}, Accuracy: {train_acc:.2f}%")
        print(f"  Test  - Loss: {test_loss:.4f}, Accuracy: {test_acc:.2f}%")
    
    return train_losses, train_accuracies, test_losses, test_accuracies

# Visualization functions
def plot_training_history(train_losses, train_accuracies, test_losses, test_accuracies):
    """Plot training and testing metrics"""
    epochs = range(1, len(train_losses) + 1)
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # Plot losses
    ax1.plot(epochs, train_losses, 'b-', label='Training Loss')
    ax1.plot(epochs, test_losses, 'r-', label='Test Loss')
    ax1.set_title('Training and Test Loss')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.grid(True)
    
    # Plot accuracies
    ax2.plot(epochs, train_accuracies, 'b-', label='Training Accuracy')
    ax2.plot(epochs, test_accuracies, 'r-', label='Test Accuracy')
    ax2.set_title('Training and Test Accuracy')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy (%)')
    ax2.legend()
    ax2.grid(True)
    
    plt.tight_layout()
    plt.show()

def visualize_predictions(model, test_loader, num_samples=10):
    """Visualize model predictions on test data"""
    model.eval()
    
    # Get a batch of test data
    data_iter = iter(test_loader)
    images, labels = next(data_iter)
    
    with torch.no_grad():
        images = images.to(device)
        outputs = model(images)
        predictions = outputs.argmax(dim=1)
    
    # Plot predictions
    fig, axes = plt.subplots(2, 5, figsize=(12, 6))
    
    for i in range(num_samples):
        row, col = i // 5, i % 5
        
        # Get image, true label, and prediction
        image = images[i].cpu().squeeze()
        true_label = labels[i].item()
        pred_label = predictions[i].item()
        confidence = torch.softmax(outputs[i], dim=0)[pred_label].item()
        
        # Plot
        axes[row, col].imshow(image, cmap='gray')
        color = 'green' if true_label == pred_label else 'red'
        axes[row, col].set_title(f'True: {true_label}, Pred: {pred_label}\nConf: {confidence:.2f}', 
                                color=color)
        axes[row, col].axis('off')
    
    plt.suptitle('Model Predictions (Green=Correct, Red=Wrong)')
    plt.tight_layout()
    plt.show()

def analyze_model_weights():
    """Visualize the weights of the first layer"""
    # Get weights from first layer
    first_layer_weights = model.fc1.weight.data.cpu().numpy()
    
    # Reshape to visualize as images (each neuron's weights as 28x28 image)
    fig, axes = plt.subplots(8, 16, figsize=(16, 8))
    
    for i in range(128):  # 128 neurons in first hidden layer
        row, col = i // 16, i % 16
        weights = first_layer_weights[i].reshape(28, 28)
        axes[row, col].imshow(weights, cmap='RdBu', vmin=-1, vmax=1)
        axes[row, col].axis('off')
        axes[row, col].set_title(f'N{i}', fontsize=8)
    
    plt.suptitle('First Layer Weights (What Each Neuron Looks For)')
    plt.tight_layout()
    plt.show()

# Main execution
if __name__ == "__main__":
    # Show sample images
    # print("1. Showing sample images...")
    # show_sample_images()
    
    # Train the model
    # print("\n2. Training the neural network...")
    # train_losses, train_accuracies, test_losses, test_accuracies = train_model(epochs=5)
    
    # Plot training history
    # print("\n3. Plotting training history...")
    # plot_training_history(train_losses, train_accuracies, test_losses, test_accuracies)
    
    # Show predictions
    # print("\n4. Visualizing predictions...")
    # visualize_predictions(model, test_loader)
    
    # Analyze model weights
    # print("\n5. Analyzing what the model learned...")
    # analyze_model_weights()
    
    # print("\n🎉 Demo completed successfully!")
    # print(f"Final test accuracy: {test_accuracies[-1]:.2f}%")

In [None]:
# Show sample images
print("1. Showing sample images...")
show_sample_images()

In [None]:
# Train the model
print("\n2. Training the neural network...")
train_losses, train_accuracies, test_losses, test_accuracies = train_model(epochs=5)

In [None]:
# Plot training history
print("\n3. Plotting training history...")
plot_training_history(train_losses, train_accuracies, test_losses, test_accuracies)

In [None]:
# Show predictions
print("\n4. Visualizing predictions...")
visualize_predictions(model, test_loader)

In [None]:
# Analyze model weights
print("\n5. Analyzing what the model learned...")
analyze_model_weights()

In [None]:
print("\n🎉 Demo completed successfully!")
print(f"Final test accuracy: {test_accuracies[-1]:.2f}%")