In [None]:
from MNISTDataset import MNISTDataset
from CNN import CNNModel
import torch.nn as nn
import torch
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
from torchvision import transforms 

In [None]:
train_dataset = MNISTDataset(r"C:\School\Term 7\Introduction to Machine learning\Lab\Lab 2\Neural-network\archive\train-images.idx3-ubyte", r"C:\School\Term 7\Introduction to Machine learning\Lab\Lab 2\Neural-network\archive\train-labels.idx1-ubyte")
test_dataset = MNISTDataset(r"C:\School\Term 7\Introduction to Machine learning\Lab\Lab 2\Neural-network\archive\t10k-images.idx3-ubyte", r"C:\School\Term 7\Introduction to Machine learning\Lab\Lab 2\Neural-network\archive\t10k-labels.idx1-ubyte")
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [50000, 10000])

learning_rates = [0.0001, 0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128, 256]
filter_config = [
    [32, 64, 128],  
    [64, 128, 256],  
    [128, 256, 512]]


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1000, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

epochs = 5
mymodel = CNNModel()
criterion =nn.CrossEntropyLoss()
optimize=torch.optim.SGD(mymodel.parameters(),lr=0.01)

best_lr_config = {"learning_rate": None, "val_accuracy": 0}
best_bs_config = {"batch_size": None, "val_accuracy": 0}
best_filter_config = {"filter_config": None, "val_accuracy": 0}

# Combined best configuration
combined_best_config = {"learning_rate": None, "batch_size": None, "filter_config": None, "val_accuracy": 0}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mymodel.to(device)

# Load MNIST datasets for training, validation, and testing
# Split training data into train and validation sets
# Define possible hyperparameters
# Create DataLoaders for train, validation, and test datasets
# number of epochs defined 
# Initialize model, loss function, and optimizer
# Dictionaries to track the best configurations
# Combined best configuration that stores all best hyperparameters
# Set device to GPU if available, otherwise use CPU


In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs, device):
    model.to(device)
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    for epoch in range(epochs):
        # Training
        model.train()
        train_loss, correct, total = 0, 0, 0
        for inputs, labels in train_loader:
            inputs = inputs.view(-1, 1, 28, 28)
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
        train_losses.append(train_loss / total)
        train_accuracies.append(correct / total)

        # Validation
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.view(-1, 1, 28, 28)
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)
        val_losses.append(val_loss / val_total)
        val_accuracies.append(val_correct / val_total)

        print(f"Epoch {epoch+1}/{epochs}")
        print(f"  Train Loss: {train_losses[-1]:.4f}, Train Accuracy: {train_accuracies[-1]:.4f}")
        print(f"  Val Loss: {val_losses[-1]:.4f}, Val Accuracy: {val_accuracies[-1]:.4f}")
    return train_losses, val_losses, train_accuracies, val_accuracies

# Training function definition
# defined lists to keep track of losses and accuracies 
# loop thorugh all training data 
# forward pass then compute the loss
# backwardpropagation 
# update weights
# count correctly predicted 
# get training accuarcy and loss
# reapet validation data 

In [None]:
def evaluate_model(model, test_loader, device):
    model.eval()
    correct, total = 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.view(-1, 1, 28, 28)
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = correct / total
    cm = confusion_matrix(all_labels, all_preds)

    print(f"Test Accuracy: {accuracy:.4f}")

    # Plot confusion matrix
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=range(10), yticklabels=range(10))
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()

# finally using best configuration retrain the model
# then test on the test data
# plot all graphs and the confusion matrix

In [None]:
def plot_loss_accuracy(train_losses, val_losses, train_accuracies, val_accuracies):
    # Plotting the training and validation loss
    plt.figure(figsize=(12, 6))

    # Plot Loss
    plt.subplot(1, 2, 1)  # 1 row, 2 columns, first subplot
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    # Plot Accuracy
    plt.subplot(1, 2, 2)  # 1 row, 2 columns, second subplot
    plt.plot(range(1, len(train_accuracies) + 1), train_accuracies, label='Train Accuracy')
    plt.plot(range(1, len(val_accuracies) + 1), val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    # Show the plots
    plt.tight_layout()
    plt.show()

In [None]:
for lr in learning_rates:
    mymodel = CNNModel()
    criterion =nn.CrossEntropyLoss()
    print(f"Training with learning rate: {lr}")
    optimizer = torch.optim.SGD(mymodel.parameters(), lr=lr)
    train_losses, val_losses, train_accuracies, val_accuracies = train_model(mymodel, train_loader, val_loader, criterion, optimizer, epochs, device)
    plot_loss_accuracy(train_losses, val_losses, train_accuracies, val_accuracies)
    
    max_val_accuracy = max(val_accuracies)
    
    # Update the best learning rate configuration
    if max_val_accuracy > best_lr_config["val_accuracy"]:
        best_lr_config.update({"learning_rate": lr, "val_accuracy": max_val_accuracy})
    if max_val_accuracy > combined_best_config["val_accuracy"]:
        combined_best_config.update({"learning_rate": lr, "batch_size": 64, "filter_config": [32, 64, 128], "val_accuracy": max(val_accuracies)})

In [None]:
for bs in batch_sizes:
    mymodel = CNNModel()
    criterion =nn.CrossEntropyLoss()
    optimize=torch.optim.SGD(mymodel.parameters(),lr=0.01)
    print(f"Training with batch size: {bs}")
    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
    train_losses, val_losses, train_accuracies, val_accuracies = train_model(mymodel, train_loader, val_loader, criterion, optimize, epochs, device)
    plot_loss_accuracy(train_losses, val_losses, train_accuracies, val_accuracies)
    
    max_val_accuracy = max(val_accuracies)
    
    # Update the best batch size configuration
    if max_val_accuracy > best_bs_config["val_accuracy"]:
        best_bs_config.update({"batch_size": bs, "val_accuracy": max_val_accuracy})
    if max_val_accuracy > combined_best_config["val_accuracy"]:
        combined_best_config.update({"learning_rate": 0.01, "batch_size": bs, "filter_config": [32, 64, 128], "val_accuracy": max(val_accuracies)})

In [None]:
for fs in filter_config:
    print(f"Training with filter configuration: {fs}")
    mymodel=CNNModel(filters=fs)
    criterion =nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(mymodel.parameters(), lr=0.01)
    train_losses, val_losses, train_accuracies, val_accuracies = train_model(mymodel, train_loader, val_loader, criterion, optimizer, epochs, device)
    plot_loss_accuracy(train_losses, val_losses, train_accuracies, val_accuracies)
    max_val_accuracy = max(val_accuracies)
    if max_val_accuracy > best_filter_config["val_accuracy"]:
        best_filter_config.update({"filter_config": fs, "val_accuracy": max_val_accuracy})
    if max_val_accuracy > combined_best_config["val_accuracy"]:
        combined_best_config.update({"learning_rate": 0.01, "batch_size": 64, "filters": fs, "val_accuracy": max_val_accuracy})

In [None]:

# Re-train and evaluate the final combined best model
print("\nTraining and evaluating the best combined model on test set...")
train_loader = DataLoader(train_dataset, batch_size=combined_best_config["batch_size"], shuffle=True)
mymodel = CNNModel(combined_best_config["filter_config"]) 
optimizer = torch.optim.SGD(mymodel.parameters(), lr=combined_best_config["learning_rate"])
train_model(mymodel, train_loader, val_loader, criterion, optimizer, epochs, device)
evaluate_model(mymodel, test_loader,device)

# Print the best configurations
print("\nBest Configurations:")
print(f"  Best Learning Rate: {best_lr_config['learning_rate']} (Validation Accuracy: {best_lr_config['val_accuracy']:.4f})")
print(f"  Best Batch Size: {best_bs_config['batch_size']} (Validation Accuracy: {best_bs_config['val_accuracy']:.4f})")
print(f"  Best filter config:{best_filter_config['filter_config']} (Validation Accuracy: {best_filter_config['val_accuracy']:.4f})")
print(f"  Combined Best Configuration: {combined_best_config}")

# Batch Size
#   The batch size determines how many training samples the model processes before updating the weights during training
#   small batch size --->More frequent weight updates and Better generalization Using small batches introduces more noise into the gradient estimation  which can help the model avoid overfitting  and lead to better generalization
# but Slower training and Noisy gradient updates are cons 
#   large batch size ---->Stable gradient updates as Larger batches tend to produce more stable and accurate gradient estimates since they average over a larger number of samples and More efficient hardware utilization as  With larger batches modern hardware like GPUs can process more data in parallel
# but Slower convergence and Risk of overfitting are cons 
# learning rate : controls how much model's wieght are adjusted
# small---->More precise convergence and stable but Slow convergence and Risk of getting stuck in local minima
# large faster convergence but poor generalization and can lead to overfitting  
# number of filters:in CNNs filters  are responsible for detecting patterns in the input images the number of filters determines the modelâ€™s capacity to learn complex patterns
# small -----> Smaller numbers of filters result in fewer weights  which can reduce the computational cost and memory requirements allowing for faster training but The model may not be able to capture complex patterns in the data especially for tasks like image classification where high-level features are crucial and may unerfit 
# large Increased model capacity so it can learn more complex features and Better feature extraction but Increased computational cost and may overfit