# AIML231 Assignment 3 | Part Three
> Shemaiah Rangitaawa `300601546`

# Part Three | Neural Networks

In [14]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if device.type == 'cuda':
    print('Device:', device)
    print('GPU:', torch.cuda.get_device_name(0))
    print('CUDA version:', torch.version.cuda)
    print('Total Memory (GB):', torch.cuda.get_device_properties(0).total_memory / 1e9) # Convert bytes to GB
    print('Number of GPUs:', torch.cuda.device_count())
else:
    print('CUDA is not available')

Device: cuda
GPU: NVIDIA GeForce RTX 2070
CUDA version: 12.1
Total Memory (GB): 8.589606912
Number of GPUs: 1


In [15]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader

# Load and split the dataset
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.2, random_state=231)

# Normalize data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader objects
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## Network Definition

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MLPNN(nn.Module):
    def __init__(self, activation_func=nn.ReLU()):
        super(MLPNN, self).__init__()
        self.fc1 = nn.Linear(64, 256)  # 64 inputs (8x8 images), 256 neurons in hidden layer
        self.fc2 = nn.Linear(256, 10)  # 10 output classes (digits 0-9)
        self.activation = activation_func

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.fc2(x)  # No activation here as it will be included in CrossEntropyLoss
        return x

## Training

In [50]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=15, print_stats=False):
    losses = []
    accuracies = []
    
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        running_corrects = 0
        total_samples = 0

        # Iterate over data
        for inputs, labels in train_loader:
            optimizer.zero_grad()  # Zero the parameter gradients

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            total_samples += inputs.size(0)

        epoch_loss = running_loss / total_samples
        epoch_acc = running_corrects.float() / total_samples

        # Append loss and accuracy to lists
        losses.append(epoch_loss)
        accuracies.append(epoch_acc)

        if print_stats:
            print(f'Epoch {epoch+1}: Loss = {epoch_loss:.3f}, Accuracy = {epoch_acc:.0%}')
    
    return losses, accuracies


In [51]:
# Initializing the model with ReLU activation
model = MLPNN(nn.ReLU()) # ReLU activation
criterion = nn.CrossEntropyLoss() # Cross-entropy loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Using Adam optimizer
relu_loss, relu_acc = train_model(model, train_loader, criterion, optimizer, num_epochs=15, print_stats=True)

Epoch 1: Loss = 2.128, Accuracy = 48%
Epoch 2: Loss = 1.607, Accuracy = 88%
Epoch 3: Loss = 1.048, Accuracy = 89%
Epoch 4: Loss = 0.662, Accuracy = 91%
Epoch 5: Loss = 0.462, Accuracy = 92%
Epoch 6: Loss = 0.354, Accuracy = 94%
Epoch 7: Loss = 0.288, Accuracy = 94%
Epoch 8: Loss = 0.244, Accuracy = 95%
Epoch 9: Loss = 0.211, Accuracy = 96%
Epoch 10: Loss = 0.190, Accuracy = 96%
Epoch 11: Loss = 0.166, Accuracy = 97%
Epoch 12: Loss = 0.153, Accuracy = 97%
Epoch 13: Loss = 0.140, Accuracy = 98%
Epoch 14: Loss = 0.129, Accuracy = 98%
Epoch 15: Loss = 0.120, Accuracy = 98%


## Test set evaluation

In [52]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    total_accuracy = 0

    with torch.no_grad():  # No need to track gradients for evaluation
        for inputs, labels in test_loader:
            outputs = model(inputs)
            total_accuracy += accuracy(outputs, labels)
    
    test_accuracy = total_accuracy / len(test_loader)  # Average the accuracy across all batches
    print(f"Test Accuracy: {test_accuracy:.2%}")
    return test_accuracy

relu_test_acc = evaluate_model(model, test_loader)

Test Accuracy: 95.31%


## Five Example Predictions

In [53]:
# Get some predictions to display with images
test_predictions = []
model.eval()
with torch.no_grad():
    for i in range(5):
        img, label = test_dataset[i]
        output = model(img.unsqueeze(0))
        pred = output.argmax(dim=1).item()
        test_predictions.append((img, pred, label))

# Outputs of predictions
for i, (img, pred, label) in enumerate(test_predictions, 1):
    print(f"Test Image {i}: Predicted Label = {pred}, Actual Label = {label}")

Test Image 1: Predicted Label = 4, Actual Label = 4
Test Image 2: Predicted Label = 5, Actual Label = 5
Test Image 3: Predicted Label = 1, Actual Label = 1
Test Image 4: Predicted Label = 4, Actual Label = 4
Test Image 5: Predicted Label = 1, Actual Label = 1


## Evaluation Using Different Activation Functions

In [55]:
# Mapping of model configurations
activations = {
    "Tanh": nn.Tanh(),
    "Sigmoid": nn.Sigmoid(),
    "ReLU": nn.ReLU(),
    "Softmax": nn.Softmax(dim=1),
    "Hardshrink": nn.Hardshrink(),
}

# Color mapping for each activation function (for plotting later)
color_map = {
    "Tanh": "rgba(255, 99, 132, 0.6)",
    "Sigmoid": "rgba(54, 162, 235, 0.6)",
    "ReLU": "rgba(255, 206, 86, 0.6)",
    "Softmax": "rgba(153, 102, 255, 0.6)",
    "Hardshrink": "rgba(75, 192, 192, 0.6)",
}

# Dictionary to store results
results = {}

# Loop through each activation function, train and evaluate the model
for name, activation_func in activations.items():
    print(f"Training with {name} activation...")
    model = MLPNN(activation_func)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    losses, accuracies = train_model(model, train_loader, criterion, optimizer)
    
    # Store results
    results[name] = {
        "losses": losses,
        "accuracies": accuracies
    }

Training with Tanh activation...
Training with Sigmoid activation...
Training with ReLU activation...
Training with Softmax activation...
Training with Hardshrink activation...


In [56]:
import plotly.graph_objects as go

def plot_training_results(results, colors):
    """
    Plots the training results showing both loss and accuracy for each model configuration using Plotly.
    Each activation function is represented by a single color across both plots, with a single legend entry.

    Parameters:
        results (dict): A dictionary where each key is a model name and each value is another
                        dictionary with keys 'losses' and 'accuracies', which are lists of
                        values for each epoch.
    """
    if not results:
        print("No results to display.")
        return

    # Determine the number of epochs from the first entry in the results
    epochs = list(range(1, len(next(iter(results.values()))['losses']) + 1))

    # Create figures for loss and accuracy
    fig = go.Figure()

    # Adding Loss and Accuracy Traces
    for name, data in results.items():
        fig.add_trace(go.Scatter(
            x=epochs, y=data['losses'],
            mode='lines+markers',
            name=name,
            line=dict(color=colors[name]),
            legendgroup=name,  # Group by name for single legend entry
            showlegend=True
        ))
        fig.add_trace(go.Scatter(
            x=epochs, y=data['accuracies'],
            mode='lines+markers',
            name=name,
            line=dict(color=colors[name]),
            xaxis='x2',
            yaxis='y2',
            legendgroup=name,  # Group by name for single legend entry
            showlegend=False  # Hide legend for accuracy to avoid duplicate entries
        ))

    # Update layout for a subplot appearance with legend on the right
    fig.update_layout(
        title='Training Results: Loss and Accuracy',
        xaxis=dict(title='Epoch', domain=[0, 0.45]),
        yaxis=dict(title='Loss'),
        xaxis2=dict(title='Epoch', domain=[0.55, 1]),
        yaxis2=dict(title='Accuracy (%)', anchor='x2'),
        legend=dict(
            x=1.05,
            xanchor='left',
            y=0.5,
            yanchor='middle'
        ),
        height=600
    )

    fig.show()

plot_training_results(results, color_map)