# <center>  *Hybrid Optimization for Training Neural Networks: Comparing Genetic Algorithms and Simulated Annealing in PyTorch for immage recognition* </center>

### Abstract
This is where my abstract will be

### Introduction
This will be the introduction for my coursework.

### Learning objective
This is where my abstract will be

### Content
This is where my abstract will be

---

## Libraries


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import numpy as np

import torch.nn as nn
import torch.nn.functional as F

#set the optimisation criteria and the optimiser from Source [1]
import torch.optim as optim

import time

import random


---

## Datasets

The dataset used is the CIFAR10 for training.

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. <br>
There are 50000 training images and 10000 test images.

## Preprocessing

In [None]:
#CIFAR10 dataset

# Load and normalise CIFAR10 dataset from Source [1]
# Define a transform to normalize the data
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Set the batch size for loading data
batch_size = 4

# Download and load the training dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

# Download and load the test dataset
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# Define the classes in the dataset
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

### End of source [1]

# source [1] : https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html 

In [None]:
# function to show image in the data set from Source [1]
# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

### end of source [1]

#function that shows images with their labels
def show_images_labels(images, labels):
    imshow(torchvision.utils.make_grid(images))
    print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

# get some random training images.
dataiter = iter(trainloader)
images, labels = next(dataiter)

show_images_labels(images, labels)

---

## Base Model Implementation

For our base neural network model we will be defining a CNN model ....


In [None]:
# Define the CNN model from Source [2] modified so it can be used for CIFAR10 dataset
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = x.view(-1, 64 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.log_softmax(x, dim=1)
### End of source [2]

In [None]:
def train_nn(model, trainloader, criterion, optimizer, epochs=2, use_gpu=True):
    """
    Train a PyTorch neural network model efficiently.
    
    Args:
        model: PyTorch model to train (updated in-place).
        trainloader: DataLoader containing training data.
        criterion: Loss function.
        optimizer: Optimizer (Adam, SGD, etc.).
        epochs (int, optional): Number of training epochs. Default is 2.
        use_gpu (bool, optional): If True, enables GPU acceleration.

    Returns:
        train_losses: List of training losses over time.
        training_time: Total time taken to train the model.
    """
    # Use GPU if available and selected
    device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
    model.to(device)  # Move model to GPU

    model.train()  # Set model to training mode
    train_losses = []

    start_time = time.time()
    for epoch in range(epochs):
        running_loss = 0.0

        for i, (inputs, labels) in enumerate(trainloader, start=1):
            # Move inputs and labels to the correct device (CPU/GPU)
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()  # Reset gradients

            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss

            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

            running_loss += loss.item()

            # Log loss statistics every 2000 mini-batches
            if i % 2000 == 0:
                avg_loss = running_loss / 2000
                train_losses.append(avg_loss)
                running_loss = 0.0  # Reset running loss
    end_time = time.time()

    training_time = end_time - start_time
    print('Finished Training')
    return train_losses, training_time

In [None]:
def test_4_random_images(testloader, model, classes, use_gpu=True):
    """
    Visually tests the model on 4 random images from the test set and computes approximate accuracy.

    Args:
        testloader: PyTorch DataLoader for test data.
        model: Trained PyTorch model.
        classes: List of class labels.
        use_gpu (bool, optional): Whether to use GPU if available.

    Returns:
        accuracy (float): Model accuracy on the 4 randomly selected images.
    """
    device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
    model.to(device)  # Move model to GPU
    model.eval()  # Set model to evaluation mode

    # Get a random batch from the testloader
    dataiter = iter(testloader)
    images, labels = next(dataiter)

    # Select 4 random indices from the batch
    indices = random.sample(range(len(images)), 4)
    selected_images = images[indices]
    selected_labels = labels[indices]

    # Move data to GPU if available
    selected_images, selected_labels = selected_images.to(device), selected_labels.to(device)

    # Get model predictions
    with torch.no_grad():
        outputs = model(selected_images)
        _, predicted = torch.max(outputs, 1)

    # Compute accuracy
    correct = (predicted == selected_labels).sum().item()
    accuracy = correct / len(selected_labels) * 100

    # Display images with labels
    imshow(torchvision.utils.make_grid(selected_images.cpu()))  # Move images back to CPU for visualization
    print('GroundTruth: ', ' '.join(f'{classes[selected_labels[j]]:5s}' for j in range(4)))
    print('Predicted:   ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(4)))
    print(f'Accuracy on these 4 images: {accuracy:.2f}%')

    return accuracy

In [None]:
def test_nn_per_class(testloader, model, classes, use_gpu=True):
    """
    Efficiently tests the model on all images in the test set and returns per-class accuracy.

    Args:
        testloader: PyTorch DataLoader for test data.
        model: Trained PyTorch model.
        classes: List of class labels.
        use_gpu (bool, optional): If True, uses GPU if available. Default is True.

    Returns:
        class_accuracies (dict): Dictionary containing accuracy for each class.
    """
    # Select device (GPU if available, otherwise CPU)
    device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()  # Set model to evaluation mode

    # Prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}

    # Disable gradient computation for efficiency
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)  # Move data to GPU/CPU
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)  # Get predicted class

            # Update per-class statistics
            for label, prediction in zip(labels, predictions):
                total_pred[classes[label.item()]] += 1  # Count total instances of each class
                if label == prediction:
                    correct_pred[classes[label.item()]] += 1  # Count correct predictions

    # Compute per-class accuracy
    class_accuracies = {
        classname: (100 * correct_pred[classname] / total_pred[classname]) if total_pred[classname] > 0 else 0.0
        for classname in classes
    }

    print("Finished per-class testing.")
    return class_accuracies


In [None]:
def test_nn(testloader, model, use_gpu=True):
    """
    Efficiently tests the model on all images in the test set and returns overall accuracy.

    Args:
        testloader: PyTorch DataLoader for test data.
        model: Trained PyTorch model.
        use_gpu (bool, optional): If True, uses GPU if available. Default is True.

    Returns:
        overall_accuracy (float): Accuracy of the model on the test set.
    """
    # Select device (GPU if available, otherwise CPU)
    device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()  # Set model to evaluation mode

    correct = 0
    total = 0

    # Disable gradient computation for efficiency
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)  # Move data to GPU/CPU
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)  # Get predicted class

            # Update total and correct predictions
            total += labels.size(0)
            correct += (predictions == labels).sum().item()

    # Compute overall accuracy
    overall_accuracy = 100 * correct / total

    print(f"Overall Model Accuracy: {overall_accuracy:.2f}%")
    return overall_accuracy


---

## Genetic Algorithm Implementation

<b> What is a Genetic algorithm?<b>

In [None]:
class LeNet5(nn.Module):

    def __init__(self, num_classes, grayscale=False):
        super(LeNet5, self).__init__()
        
        self.grayscale = grayscale
        self.num_classes = num_classes

        if self.grayscale:
            in_channels = 1
        else:
            in_channels = 3

        self.features = nn.Sequential(
            
            nn.Conv2d(in_channels, 6*in_channels, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(6*in_channels, 16*in_channels, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(16*5*5*in_channels, 120*in_channels),
            nn.Tanh(),
            nn.Linear(120*in_channels, 84*in_channels),
            nn.Tanh(),
            nn.Linear(84*in_channels, num_classes),
        )


    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [None]:
def compute_fitness(model, optimizer, train_loader, test_loader,epochs):
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters())

    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

    print('Finished Training')

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = correct / total
    print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
    
    return accuracy

In [None]:
# write a function to initialise the population
def initialise_population(population):
    population = []
    for _ in range(population_size):
        model = CNN()
        population.append(model)
    return population

In [None]:
# write the function to perform the crossover operation
def crossover(parent1, parent2):
    child1 = CNN()
    child2 = CNN()

    child1.conv1.weight.data = torch.cat((parent1.conv1.weight.data[:16], parent2.conv1.weight.data[16:]), dim=0)
    child2.conv1.weight.data = torch.cat((parent2.conv1.weight.data[:16], parent1.conv1.weight.data[16:]), dim=0)
    return child1, child2

In [None]:
# write the function to perform the mutation operation
def mutate(model, mutation_rate):
    for param in model.parameters():
        if torch.rand(1).item() < mutation_rate:
            param.data += torch.randn(param.data.size()) * 0.1 # add random noise to the parameter
    return model

In [None]:
# #create a function that selects the tope best half of a population
def select_best(population, fitness_scores):
    best_half = []
    for _ in range(population_size // 2):
        index = fitness_scores.index(max(fitness_scores))
        best_half.append(population[index])
        fitness_scores[index] = 0
    return best_half

In [None]:
# Initialize the population
population = initialise_population(population_size)

# Start Genetic Algorithm process
for generation in range(generations):
    print(f'Generation {generation + 1}')
    best_accuracy = 0
    best_model = None
    fitness_scores = np.zeros(population_size)  # Preallocate array for fitness scores

    # Start timer for the generation
    start_time = time.time()

    # **Optimized Parallel Fitness Evaluation**
    for i, model in enumerate(population):
        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)  # Define optimizer for each model
        fitness = compute_fitness(model, trainloader, testloader, epochs)
        fitness_scores[i] = fitness
        
        if fitness > best_accuracy:
            best_accuracy = fitness
            best_model = model

    # End timer for the generation
    end_time = time.time()
    generation_time = end_time - start_time

    print(f'Best accuracy in generation {generation + 1} = {best_accuracy:.4f}')
    print(f'Time taken for generation {generation + 1}: {generation_time:.2f} seconds')
    print("\n")

    # **Parent Selection**
    top_indices = np.argsort(fitness_scores)[-population_size // 2:]  # Get indices of best half
    selected_parents = [population[i] for i in top_indices]

    next_generation = []

    # **Crossover & Mutation**
    for i in range(0, len(selected_parents), 2):
        parent1 = selected_parents[i]
        parent2 = selected_parents[i + 1]

        # Perform crossover
        child1, child2 = crossover(parent1, parent2)
        
        # Apply mutation
        mutate(child1, mutation_rate)
        mutate(child2, mutation_rate)

        next_generation.append(child1)
        next_generation.append(child2)

    # **Efficient Population Update**
    population = next_generation[:population_size]  # Ensure population size remains constant

# **Evaluate Final Best Model**
print("Best model's accuracy:")
final_accuracy = test_nn(testloader, best_model)


In [None]:
# # execute the genetic algorithm
# population = initialise_population(population_size)

# for generation in range(generations):
#     print(f'Generation {generation + 1}')
#     best_accuracy = 0
#     best_model = None
#     fitness_scores = []

#     # Start timer for the generation
#     start_time = time.time()

#     # Compute the fitness of the population
#     for model in population:
#         optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)  # Define optimizer for each model
#         fitness = compute_fitness(model, trainloader, testloader, epochs)
        
#         if fitness > best_accuracy:
#             best_accuracy = fitness
#             best_model = model
        
#         fitness_scores.append(fitness)

#     # End timer for the generation
#     end_time = time.time()
#     generation_time = end_time - start_time

#     print(f'Best accuracy in generation {generation + 1} = {best_accuracy}')
#     print(f'Time taken for generation {generation + 1}: {generation_time:.2f} seconds')
#     print("\n")

#     next_generation = []

#     # Select the top half of the population
#     selected_parents = select_best(population, fitness_scores)

#     # Crossover and mutate the parents
#     for i in range(0, len(selected_parents), 2):
#         parent1 = selected_parents[i]
#         parent2 = selected_parents[i + 1]
#         child1, child2 = crossover(parent1, parent2)
#         child1 = mutate(child1, mutation_rate)
#         child2 = mutate(child2, mutation_rate)
#         next_generation.append(child1)
#         next_generation.append(child2)

#     # Update the population with the next generation
#     population = next_generation

# # Print the best performing model
# print("Best model's accuracy:")
# print(test_all_images(testloader, best_model))


In [None]:
# save the best model
torch.save(best_model.state_dict(), 'best_model.pth')

---

## Simulated Annealing Implementation

In [None]:
# Simulated Annealing Hyperparameters
initial_temp = 10.0  # Initial temperature
final_temp = 0.1  # Stopping temperature
alpha = 0.95  # Cooling rate
max_iterations = 100  # Number of iterations per temperature step

model = CNN()

In [None]:
# #function to compute loss
# def compute_loss(model, dataloader):
#     model.eval()
#     loss = 0
#     with torch.no_grad():
#         for data, target in dataloader:
#             output = model(data)
#             loss += criterion(output, target).item()
#     return loss

In [None]:
# import copy

# def simulated_annealing(model, dataloader, initial_temp, final_temp, alpha, max_iterations):
#     """
#     Simulated Annealing Algorithm for optimizing neural network weights.

#     Args:
#         model: PyTorch model to optimize.
#         dataloader: DataLoader for evaluating loss.
#         initial_temp: Initial temperature (controls acceptance of bad moves).
#         final_temp: Final temperature to stop SA.
#         alpha: Cooling rate (0 < alpha < 1).
#         max_iterations: Number of iterations per temperature step.

#     Returns:
#         best_model: The model with the lowest observed loss.
#     """
#     best_model = copy.deepcopy(model)
#     current_model = copy.deepcopy(model)
#     current_loss = compute_loss(current_model, dataloader)
#     best_loss = current_loss  # Store best loss for efficiency

#     temp = initial_temp
#     iteration = 0

#     while temp > final_temp:
#         for _ in range(max_iterations):
#             # Generate a new model with slight random weight changes
#             new_model = copy.deepcopy(current_model)
#             with torch.no_grad():
#                 for param in new_model.parameters():
#                     perturbation = torch.randn_like(param) * (temp / initial_temp)  # Scale perturbation
#                     param += perturbation  # Apply perturbation

#             # Compute new loss
#             new_loss = compute_loss(new_model, dataloader)

#             # Accept new model if loss is lower or with a probability P
#             delta = new_loss - current_loss
#             if delta < 0 or np.exp(-delta / temp) > np.random.rand():
#                 current_model = new_model
#                 current_loss = new_loss

#                 # Update best model if improved
#                 if new_loss < best_loss:
#                     best_model = copy.deepcopy(new_model)
#                     best_loss = new_loss

#         # Reduce temperature
#         temp *= alpha
#         iteration += 1
#         print(f"Iteration {iteration}: Temperature {temp:.4f}, Loss {current_loss:.4f}")

#     return best_model

# # Run Simulated Annealing on CNN
# best_trained_model = simulated_annealing(model, trainloader, initial_temp=10.0, final_temp=0.1, alpha=0.95, max_iterations=10)

# print(test_all_images(testloader, best_trained_model))

---

## Evaluation Function

In [None]:
#This function will be used to evaluate the network.
def evaluate_model(model, trainloader, testloader, classes, epochs=10, use_gpu=True, show_graph=True):
    """
    Trains and evaluates a PyTorch model while tracking losses and accuracies.

    Args:
        model: PyTorch model to train and evaluate.
        trainloader: DataLoader containing training data.
        testloader: DataLoader containing test data.
        classes: List of class labels.
        epochs (int, optional): Number of training epochs. Default is 10.
        use_gpu (bool, optional): If True, enables GPU acceleration. Default is True.
        show_graph (bool, optional): If True, displays accuracy/loss graphs. Default is True.

    Returns:
        results (dict): Dictionary containing:
            - 'train_losses': List of training losses.
            - 'train_accuracies': List of training accuracies.
            - 'test_accuracies': List of test accuracies.
            - 'test_class_accuracies': Dictionary of per-class accuracy.
            - 'final_test_accuracy': Final overall test accuracy.
            - 'total_time': Total time taken for training.
    """
    # Specify the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimiser = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    # Move model to GPU if available
    device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
    model.to(device)

    # Initialize tracking variables
    train_losses = []
    train_accuracies = []
    test_accuracies = []

    total_time = 0

    # Training and Evaluation Loop
    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")

        # Train the model for one epoch
        train_loss, train_time = train_nn(model, trainloader, criterion, optimiser, epochs=1, use_gpu=use_gpu)
        total_time += train_time

        # Test the model after training
        test_accuracy = test_nn(testloader, model, classes, use_gpu=use_gpu)

        # Store losses and accuracies
        train_losses.append(train_loss[-1])  # Store the last batch loss of the epoch
        train_accuracies.append(test_accuracy)  # Approximate training accuracy
        test_accuracies.append(test_accuracy)

        print(f'Epoch {epoch+1}: Train Loss: {train_loss[-1]:.4f}, Test Accuracy: {test_accuracy:.2f}%')

    # Final test accuracy after all epochs
    final_test_accuracy = test_accuracies[-1]

    # final test acuracy per class
    test_class_accuracies = test_nn_per_class(testloader, model, classes, use_gpu=use_gpu)

    
    if show_graph:
        # Plot training vs testing accuracy if enabled
        plt.figure(figsize=(12, 6))
        plt.plot(range(1, epochs+1), train_accuracies, label='Train Accuracy')
        plt.plot(range(1, epochs+1), test_accuracies, label='Test Accuracy')
        plt.title('Training vs Testing Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy (%)')
        plt.legend()
        plt.grid(True)
        plt.show()

        #plot the loss curve over time as the model trained
        plt.figure(figsize=(10, 5))
        plt.plot(train_losses, label='Training Loss', color='blue')
        plt.xlabel('Batch (x2000)')
        plt.ylabel('Loss')
        plt.title('Training Loss Over Time')
        plt.legend()
        plt.legend()
        plt.grid(True)
        plt.show()       

        # Plot per-class accuracy if enabled
        plt.figure(figsize=(10, 5))
        plt.bar(test_class_accuracies.keys(), test_class_accuracies.values(), color='blue')
        plt.xlabel("Classes")
        plt.ylabel("Accuracy (%)")
        plt.title("Model Accuracy Per Class")
        plt.xticks(rotation=45)  # Rotate class names for readability
        plt.ylim(0, 100)  # Set y-axis range to 0-100%
        plt.show()

        # print per-class accuracy
        for classname, accuracy in test_class_accuracies():
            print(f'Accuracy for class {classname:5s}: {accuracy:.1f}%')

    # Return all results in a dictionary
    results = {
        "train_losses": train_losses,
        "train_accuracies": train_accuracies,
        "test_accuracies": test_accuracies,
        "test_class_accuracies": test_class_accuracies,
        "final_test_accuracy": final_test_accuracy,
        "total_time": total_time
    }

    return results

---
## Comparitive Analysis and Results


### Base model results


In [None]:
# Define model, optimizer, and loss function
base = CNN()
optimizer = optim.Adam(base.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train on GPU with AMP
trained_model, losses = train_nn(base, trainloader, criterion, optimizer, epochs=5, stats=True, graph=True, use_gpu=True)

### Genetic algorithm results

In [None]:
# Set the number of generations
generations = 2

# Set the population size
population_size = generations * 2

# Set the mutation rate
mutation_rate = 0.1

# Set the number of epochs
epochs = 5

### Simulated anealing results

---

## Hybrid Training Approach

---

## Ensamble learinging Strategy

---
## Conclusion



---
## References


source [1] : https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html 
source [2] : https://www.geeksforgeeks.org/how-to-implement-genetic-algorithm-using-pytorch/ 