# 

**CNN for image recognition using CIFAR-10 Dataset**
 * **Optimizer: Gradient Descent**

In [None]:
"""
Inspiration from the official tutorials for PyTorch from YuliyaPylypiv on GitHub
Source: https://github.com/pytorch/examples/blob/main/mnist/main.py
"""


import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

#Check if GPU is available and set device accordingly
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Number of the GPU in use: ", torch.cuda.device_count())
print("GPU Model: ", torch.cuda.get_device_name(0))

#DEFINE THE NETWORK
class DeepCNN(nn.Module):
    def __init__(self):
        super(DeepCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.batchnorm1 = nn.BatchNorm2d(32) 
        self.batchnorm2 = nn.BatchNorm2d(64)
        self.batchnorm3 = nn.BatchNorm2d(128)
        self.dropout = nn.Dropout(p=0.2)  #Dropout with p=0.2
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 10)


    def forward(self, x):
        x = self.pool(torch.relu(self.batchnorm1(self.conv1(x))))
        x = self.pool(torch.relu(self.batchnorm2(self.conv2(x))))
        x = self.dropout(x)
        x = self.pool(torch.relu(self.batchnorm3(self.conv3(x))))
        x = x.view(-1, 128 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def print_weights(self):
        for name, param in self.named_parameters():
            print(f"Layer: {name}, Number of Weights: {param.numel()}")


#Load CIFAR-10 and normalize
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

#This is the creation of the instance of our CNN
model = DeepCNN().to(device)
model.print_weights()
#model.extract_weights_and_biases()

#Initialize the model, loss function, and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.005)

epochs = 50
train_accuracy = []
test_accuracy = []
losses = []
lr_SGD=0.005
ADAM_lr = 0.001

#Training model
def train_model(model, trainloader, criterion, optimizer_type, learning_rate, device, disable_param_fc, disable_param_fc3):
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    model.train()

    #Customizable optimizer
    if optimizer_type == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=lr_SGD)
    elif optimizer_type == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=ADAM_lr)

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        if disable_param_fc:
            for name, param in model.named_parameters():
                if 'fc3' not in name:  #Checking if the layer is not fc3
                    param.requires_grad = False
        else:
            for name, param in model.named_parameters():
                if 'fc3' not in name:  #Checking if the layer is not fc3
                    param.requires_grad = True

        if disable_param_fc3:
            for param in model.fc3.parameters():
                param.requires_grad = False
        else:
            for param in model.fc3.parameters():
                param.requires_grad = True

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()

        if not (disable_param_fc or disable_param_fc3):
            loss.backward()

        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(trainloader)
    train_accuracy = 100 * correct_train / total_train
    return epoch_loss, train_accuracy

for epoch in range(epochs):
    epoch_loss, train_acc = train_model(model, trainloader, criterion, optimizer_type= 'SGD', learning_rate= lr_SGD, device=device, disable_param_fc=False, disable_param_fc3=False)
    losses.append(epoch_loss)
    train_accuracy.append(train_acc)

    #Testing and printing the accuracies
    model.eval()
    correct_test = 0
    total_test = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total_test += labels.size(0)
            correct_test += (predicted == labels).sum().item()

    test_accuracy.append(100 * correct_test / total_test)

    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss:.4f}, Train Accuracy: {train_accuracy[-1]:.2f}%, Test Accuracy: {test_accuracy[-1]:.2f}%")

#Plotting the accuracies of Training and Test
plt.figure(figsize=(10, 6))
plt.plot(range(1, epochs + 1), train_accuracy, label='Training Accuracy')
plt.plot(range(1, epochs + 1), test_accuracy, label='Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.title('Training and Test Accuracy')
plt.legend()
plt.show()


* Optimizer: PSO

In [None]:
#Function to Randomize weights and biases
import torch.nn.init as init

def randomize_weights(model):
    with torch.no_grad():
        init.xavier_uniform_(model.fc3.weight)
        init.constant_(model.fc3.bias, 0.0)

randomize_weights(model)

#Print to see the changed values
print("Randomized Last Layer Weights:")
print(model.fc3.weight.data)
print("Randomized Last Layer Biases:")
print(model.fc3.bias.data)

In [None]:
"""
Inspired by the code from lab 4 of COMM3013, University of Surrey
Source: https://surreylearn.surrey.ac.uk/d2l/le/lessons/252875/topics/2851191
"""

import operator
import random
import math
from deap import base
from deap import benchmarks
from deap import creator
from deap import tools
import numpy as np


posMinInit      = -1
posMaxInit      = +1
VMaxInit        = 2
VMinInit        = 0.45
populationSize  = 60
dimension       = 650
interval        = 1
iterations      = 50 

#Parameter setup

wmax = 0.87 #weighting
wmin = 0.4 
c1   = 2.0
c2   = 2.0

#Other variables or lists
best_particle = None
pso_list_best =[]

creator.create("FitnessMin", base.Fitness, weights=(+1.0,)) # -1 is for minimise
creator.create("Particle", list, fitness=creator.FitnessMin, speed=list, smin=None, smax=None, best=None)
# particle rerpresented by list of 5 things
# 1. fitness of the particle, 
# 2. speed of the particle which is also going to be a list, 
# 3.4. limit of the speed value, 
# 5. best state the particle has been in so far.


def generate(size, smin, smax):
    part = creator.Particle(random.uniform(posMinInit, posMaxInit) for _ in range(size)) 
    part.speed = [random.uniform(VMinInit, VMaxInit) for _ in range(size)]
    part.smin = smin #speed clamping values
    part.smax = smax
    return part


#Our function to measure the fitness
def part_fit(particle):

    #Replace the final layer
    #Take the first 640 elements of the list "particle" -> to tensor -> to fc weights
    individual_tensor = torch.tensor(particle[:640]).to(device)
    model.fc3.weight.data = individual_tensor.view(model.fc3.weight.size())
    #Biases are te last 10 values
    individual_tensor_b = torch.tensor(particle[-10:]).to(device)
    model.fc3.bias.data = individual_tensor_b.view(model.fc3.bias.size())
    model.to(device)

    #call the training function
    epoch_loss, train_accuracy = train_model(model, trainloader, criterion, optimizer_type= 'SGD', learning_rate= lr_SGD, device=device, disable_param_fc=True, disable_param_fc3=True)   
    par_val = train_accuracy
    return par_val

def updateParticle(part, best, weight):
    #implementing speed = 0.7*(weight*speed + c1*r1*(localBestPos-currentPos) + c2*r2*(globalBestPos-currentPos))
    #Note that part and part.speed are both lists of size dimension
    #hence all multiplies need to apply across lists, so using e.g. map(operator.mul, ...

    r1 = (random.uniform(0, 1) for _ in range(len(part)))
    r2 = (random.uniform(0, 1) for _ in range(len(part)))

    v_r0 = [weight*x for x in part.speed]
    v_r1 = [c1*x for x in map(operator.mul, r1, map(operator.sub, part.best, part))] # local best
    v_r2 = [c2*x for x in map(operator.mul, r2, map(operator.sub, best, part))] # global best
    
    part.speed = [0.7*x for x in map(operator.add, v_r0, map(operator.add, v_r1, v_r2))]

#not using clamping
#     #clamp limits
#     for i, speed in enumerate(part.speed):
#         if abs(speed) < part.smin:
#             part.speed[i] = math.copysign(part.smin, speed)
#         elif abs(speed) > part.smax:
#             part.speed[i] = math.copysign(part.smax, speed)
            
    # update position with speed
    part[:] = list(map(operator.add, part, part.speed))

toolbox = base.Toolbox()
toolbox.register("particle", generate, size=dimension, smin=-3, smax=3)
#toolbox.register("particle", generate, weights_and_biases=weights_and_biases, smin=-3, smax=3)
toolbox.register("population", tools.initRepeat, list, toolbox.particle)
toolbox.register("update", updateParticle)
toolbox.register("evaluate", benchmarks.sphere) #sphere function is built-in in DEAP
toolbox.register("part_fit", part_fit)

def main():
    pop = toolbox.population(n=populationSize) # Population Size
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    logbook = tools.Logbook()
    logbook.header = ["gen", "evals"] + stats.fields

    best = None

    #begin main loop
    for g in range(iterations):
        print("Iteration number ", g+1)
        print("###########################################################")
        w = wmax - (wmax-wmin)*g/iterations #decaying inertia weight
        
        for x_1, part in enumerate(pop):
            part.fitness.values = (toolbox.part_fit(part),) #changed to use our own function
            print("Particle at index", x_1, "has a fitness of", part.fitness.values[0])

            
            #update local best
            if (not part.best) or (part.best.fitness < part.fitness):   #lower fitness is better (minimising)
            #   best is None   or  current value is better              #< is overloaded        
                part.best = creator.Particle(part)
                part.best.fitness.values = part.fitness.values
            
            #update global best
            if (not best) or best.fitness < part.fitness:
                best = creator.Particle(part)
                best.fitness.values = part.fitness.values
                best_particle = best.fitness.values
                #Printing the best particle when it gets updated
                print("New best particle value: ", best.fitness.values[0] )
                
        for part in pop:
            toolbox.update(part, best,w)

        # Gather all the fitnesses in one list and print the stats
        # print every interval
        if g%interval==0: # interval
            logbook.record(gen=g, evals=len(pop), **stats.compile(pop))
            print(logbook.stream)
            pso_list_best.append(logbook.select("max"))
    
    print('best particle position is ',best)
    return pop, logbook, best

if __name__ == "__main__":
   bestParticle = main()



In [None]:
#PUT best particle into the model and test
bestParticle2 = bestParticle[2]
individual_tensor = torch.tensor(bestParticle2[:640]).to(device)
model.fc3.weight.data = individual_tensor.view(model.fc3.weight.size())
individual_tensor_b = torch.tensor(bestParticle2[-10:]).to(device)
model.fc3.bias.data = individual_tensor_b.view(model.fc3.bias.size())

#TEST AGAIN
model.eval()
correct_test = 0
total_test = 0

with torch.no_grad():
    for data in testloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total_test += labels.size(0)
        correct_test += (predicted == labels).sum().item()

test_accuracy_updated2 = 100 * correct_test / total_test
print(f"PSO Test Accuracy with new weights and biases: {test_accuracy_updated2:.2f}%")

* Our Algorithm Proposed: Hybrid ADAM Particle Swarm Optimization (HAPSO)

In [None]:
randomize_weights(model)

In [None]:
"""
Inspired by the code from lab 4 of COMM3013, University of Surrey
Source: https://surreylearn.surrey.ac.uk/d2l/le/lessons/252875/topics/2851191
"""

import operator
import random
import math
from deap import base
from deap import benchmarks
from deap import creator
from deap import tools
import numpy as np


posMinInit      = -1
posMaxInit      = 1
VMaxInit        = 2
VMinInit        = 0.45
populationSize  = 60
dimension       = 650
interval        = 1
iterations      = 50

#Parameter setup

wmax = 0.9 #weighting
wmin = 0.25
c1   = 2.3
c2   = 1.7

###########################################################
#New approach variables needed
perc_iter = 0.8
#Creation of a new variable that sets 20% of the iterations 
#these iterations will be used for the second approach to apply ADAM
iter_counter = int(iterations*perc_iter)
print("ITER COUNTER: " ,iter_counter)

ADAM_lr = 0.005

#Values needed for the resampling based on last generation improvements
best_fit_history = []
improvement_threshold = 0.02 #The improvement must be at least 2%
generations_to_check = 5 #Number of generations to check for improvement  



#######################################################

#Other variables or lists
best_particle = None
best = None
best_position = None
our_algorithm_list_best =[]

creator.create("FitnessMin", base.Fitness, weights=(+1.0,)) # -1 is for minimise
creator.create("Particle", list, fitness=creator.FitnessMin, speed=list, smin=None, smax=None, best=None)
# particle rerpresented by list of 5 things
# 1. fitness of the particle,
# 2. speed of the particle which is also going to be a list,
# 3.4. limit of the speed value,
# 5. best state the particle has been in so far.


def generate(size, smin, smax):
    part = creator.Particle(random.uniform(posMinInit, posMaxInit) for _ in range(size))
    part.speed = [random.uniform(VMinInit, VMaxInit) for _ in range(size)]
    part.smin = smin #speed clamping values
    part.smax = smax
    return part


#Our function to measure the fitness
def part_fit(particle):

    #Replace the final layer
    #Take the first 640 elements of the list "particle" -> to tensor -> to fc3 weights
    individual_tensor = torch.tensor(particle[:640]).to(device)
    model.fc3.weight.data = individual_tensor.view(model.fc3.weight.size())
    #Biases are te last 10 values
    individual_tensor_b = torch.tensor(particle[-10:]).to(device)
    model.fc3.bias.data = individual_tensor_b.view(model.fc3.bias.size())
    model.to(device)

    #call the training function
    epoch_loss, train_accuracy = train_model(model, trainloader, criterion, optimizer_type= 'SGD', learning_rate= lr_SGD, device=device, disable_param_fc=True, disable_param_fc3=True)
    par_val = train_accuracy
    return par_val


#USED FOR ADAM: Our function to measure the fitness
def part_fit2(particle):

    #Replace the final layer
    #Take the first 640 elements of the list "particle" -> to tensor -> to fc3 weights
    individual_tensor = torch.tensor(particle[:640]).to(device)
    model.fc3.weight.data = individual_tensor.view(model.fc3.weight.size())
    #Biases are te last 10 values
    individual_tensor_b = torch.tensor(particle[-10:]).to(device)
    model.fc3.bias.data = individual_tensor_b.view(model.fc3.bias.size())
    model.to(device)

    #call the training function
    epoch_loss, train_accuracy = train_model(model, trainloader, criterion, optimizer_type= 'Adam', learning_rate= ADAM_lr, device=device, disable_param_fc=True, disable_param_fc3=False)
    par_val = train_accuracy
    return par_val

def updateParticle(part, best, weight):
    #implementing speed = 0.7*(weight*speed + c1*r1*(localBestPos-currentPos) + c2*r2*(globalBestPos-currentPos))
    #Note that part and part.speed are both lists of size dimension
    #hence all multiplies need to apply across lists, so using e.g. map(operator.mul, ...

    r1 = (random.uniform(0, 1) for _ in range(len(part)))
    r2 = (random.uniform(0, 1) for _ in range(len(part)))

    v_r0 = [weight*x for x in part.speed]
    v_r1 = [c1*x for x in map(operator.mul, r1, map(operator.sub, part.best, part))] # local best
    v_r2 = [c2*x for x in map(operator.mul, r2, map(operator.sub, best, part))] # global best
    
    part.speed = [0.7*x for x in map(operator.add, v_r0, map(operator.add, v_r1, v_r2))]

    part[:] = list(map(operator.add, part, part.speed))

toolbox = base.Toolbox()
toolbox.register("particle", generate, size=dimension, smin=-3, smax=3)
toolbox.register("population", tools.initRepeat, list, toolbox.particle)
toolbox.register("update", updateParticle)
toolbox.register("part_fit", part_fit)
toolbox.register("part_fit2", part_fit2)

#DISCLAIMER: during one of our runs we encountered a problem regarding an erroneous conversion
#in the datatype Long, which we did not address due to lack of time
#Due to the stochasticity of this algorithm, the problem may or may not manifest

def main():
    best_position=None
    best=None
    reset = 0

    pop = toolbox.population(n=populationSize) # Population Size
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    logbook = tools.Logbook()
    logbook.header = ["gen", "evals"] + stats.fields

    best = None

    #begin main loop
    for g in range(iterations):
        reset += 1
        print("Iteration number ", g+1)
        print("###########################################################")
        w = wmax - (wmax-wmin)*g/iterations #decaying inertia weight

        for x_1, part in enumerate(pop):
         

            part.fitness.values = (toolbox.part_fit(part),) #changed to use our own function
            print("Particle at index", x_1, "has a fitness of", part.fitness.values[0])


            #update local best
            if (not part.best) or (part.best.fitness < part.fitness):   #lower fitness is better (minimising)
            #   best is None   or  current value is better              #< is overloaded
                part.best = creator.Particle(part)
                part.best.fitness.values = part.fitness.values

            #update global best
            if (not best) or best.fitness < part.fitness:
                best = creator.Particle(part)
                best.fitness.values = part.fitness.values
                best_particle = best.fitness.values
                #Printing the best particle when it gets updated
                print("New best particle value: ", best.fitness.values[0] )

                best_fit_history.append(best.fitness.values[0])
            
            if len(best_fit_history) > generations_to_check:
                last_generations_fitness = best_fit_history[-generations_to_check:]
                improvement = max(last_generations_fitness) - min(last_generations_fitness)
                
                if improvement <= improvement_threshold * max(last_generations_fitness) and reset >= 5:
                    reset = 0
                    print("There has been no significant improvement for the last 5 iterations, Resampling particles")
                    worst_particles = sorted(pop, key=lambda x: x.best.fitness.values[0])[:5]
                    for particle in worst_particles:
                        #Resetting particles' velocity
                        particle.speed = [random.uniform(VMinInit, VMaxInit) for _ in range(dimension)]
                        #Using a radom distribution to perturb the position the low fitness particles, instead of random resampling
                        dist_perturbation = [random.uniform(-0.15, 0.15) for _ in range(dimension)]
                        #Updating particle's position as well
                        particle[:] = [max(posMinInit, min(posMaxInit, pos + perturb)) for pos, perturb in zip(particle, dist_perturbation)]

                    

        if (g == iter_counter):
         print("ADAM on all Particles of generation ", g+1)

        x_2=1
        for part in pop:
            toolbox.update(part, best,w)


            #Our approach is applied in the last 20% of iterations
            if (g >= iter_counter):
                print("ADAM section: ")

                #Applying ADAM only on the fc3 layer
                adam_fc3 = optim.Adam([
                {'params': model.fc3.weight},
                {'params': model.fc3.bias} 
                ], lr=ADAM_lr)
                adam_fc3.zero_grad() #reset the gradient
                
                part.fitness.values = (toolbox.part_fit2(part),)
                print("Particle at index", x_2, "has a fitness of", part.fitness.values[0])
                x_2 += 1

                #update local best
                if (not part.best) or (part.best.fitness < part.fitness):
                #   best is None   or  current value is better             
                   part.best = creator.Particle(part)
                   part.best.fitness.values = part.fitness.values

                #update global best
                if (not best) or best.fitness < part.fitness:
                    best = creator.Particle(part)
                    best.fitness.values = part.fitness.values
                    best_particle = best.fitness.values
                    #Printing the best particle when it gets updated
                    print("\033[92m New best particle with ADAM: \033[0m", best.fitness.values[0] )

                toolbox.update(part, best,w) #update again
                #Then we decrease the temperature only in the last 20% of the generations

        if g%interval==0: #Intervallo
            logbook.record(gen=g, evals=len(pop), **stats.compile(pop))
            print(logbook.stream)
            our_algorithm_list_best.append(logbook.select("max"))
        
        #Best particle position
        if (not best) or best.fitness < best.fitness:
            best = creator.Particle(part)
            best.fitness.values = part.fitness.values
            best_position = part[:] 

    return pop, logbook, best

if __name__ == "__main__":
   best_particle = main()


In [None]:
#TEST ACCURACY OUR ALGORITHM ON TESTSET
final_list= best_particle[2]

individual_tensor = torch.tensor(final_list[:640]).to(device)
model.fc3.weight.data = individual_tensor.view(model.fc3.weight.size())
individual_tensor_b = torch.tensor(final_list[-10:]).to(device)
model.fc3.bias.data = individual_tensor_b.view(model.fc3.bias.size())

#TEST AGAIN
model.eval()
correct_test = 0
total_test = 0

with torch.no_grad():
    for data in testloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total_test += labels.size(0)
        correct_test += (predicted == labels).sum().item()

test_accuracy_updated2 = 100 * correct_test / total_test
print(f"HAPSO: Test Accuracy with new weights and biases: {test_accuracy_updated2:.2f}%")

In [None]:
#Plot the final chart
plt.figure(figsize=(10, 6))
plt.plot(range(1, epochs + 1), train_accuracy, label='SGD')
plt.plot(pso_list_best[-1], label='PSO')
plt.plot(our_algorithm_list_best[-1], label='Our Algorithm')
plt.xlabel('Number of Iterations')
plt.ylabel('Accuracy on Training set (%)')
plt.title('Comparison between algorithms')
plt.legend()
plt.show()

## NSGA-II

In [None]:
"""
Inspired by the code from lab 6 of COMM3013, University of Surrey
Source: https://surreylearn.surrey.ac.uk/d2l/le/lessons/252875/lessons/2851159
"""


#    This file is part of DEAP.
#    This implements the NSGA-II in an easy way because it makes us of DEAP subroutines
#    The non dominated sort and crowding distance are solved by a simiple call to DEAP subroutines
#    and their implementation is hidden.
#
#    DEAP is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Lesser General Public License as
#    published by the Free Software Foundation, either version 3 of
#    the License, or (at your option) any later version.
#
#    DEAP is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#    GNU Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with DEAP. If not, see <http://www.gnu.org/licenses/>.

from deap import algorithms
from deap import base
from deap import benchmarks
from deap.benchmarks.tools import diversity, convergence, hypervolume
from deap import creator
from deap import tools
import matplotlib.pyplot as plt


creator.create("FitnessMax", base.Fitness, weights=(1.0, -1.0))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()

BOUND_LOW, BOUND_UP = -1.0, 1.0
NDIM = 650

def accuracy_value (individual):
    #Take the first 640 elements of the list "particle" -> to tensor -> to fc3 weights
    individual_tensor = torch.tensor(individual[:640]).to(device)
    model.fc3.weight.data = individual_tensor.view(model.fc3.weight.size())
    #Biases are te last 10 values
    individual_tensor_b = torch.tensor(individual[-10:]).to(device)
    model.fc3.bias.data = individual_tensor_b.view(model.fc3.bias.size())
    model.to(device)
    epoch_loss, train_accuracy = train_model(model, trainloader, criterion, optimizer_type= 'SGD', learning_rate= lr_SGD, device=device, disable_param_fc=True, disable_param_fc3=True)
    par_val = train_accuracy

    return train_accuracy

def gaussian_regulariser (individual):
    gaussian_regulariser = 0.0
    for parameter in individual:
        gaussian_regulariser += parameter**2

    return gaussian_regulariser

import random
import numpy
from math import sqrt



def calcFitness (individual):
    f1= accuracy_value(individual) #  accuracy
    f2= gaussian_regulariser(individual) #gaussian regulariser
    print("Accuracy " + str(f1))
    print("Gaussian " + str(f2))
    return f1,f2

def uniform(low, up, size=None):
    try:
        return [random.uniform(a, b) for a, b in zip(low, up)]
    except TypeError:
        return [random.uniform(a, b) for a, b in zip([low] * size, [up] * size)]

toolbox.register("attr_float", uniform, BOUND_LOW, BOUND_UP, NDIM)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.attr_float)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", calcFitness)
toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0)
flipProb=1.0/9
toolbox.register("mutate", tools.mutPolynomialBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0, indpb=1.0/NDIM)
toolbox.register("select", tools.selNSGA2)

def main(seed=None):
    random.seed(seed)

    NGEN = 40
    MU = 12
    CXPB = 0.9

    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", numpy.mean, axis=0)
    stats.register("std", numpy.std, axis=0)
    stats.register("min", numpy.min, axis=0)
    stats.register("max", numpy.max, axis=0)

    logbook = tools.Logbook()
    logbook.header = "gen", "evals", "std", "min", "avg", "max"

    pop = toolbox.population(n=MU)


    invalid_ind = [ind for ind in pop if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    pop = toolbox.select(pop, len(pop))

    record = stats.compile(pop)
    logbook.record(gen=0, evals=len(invalid_ind), **record)
    print(logbook.stream)


    for gen in range(1, NGEN):

        offspring = tools.selTournamentDCD(pop, len(pop))
        offspring = [toolbox.clone(ind) for ind in offspring]

        for ind1, ind2 in zip(offspring[::2], offspring[1::2]):
            if random.random() <= CXPB:
                toolbox.mate(ind1, ind2)

            toolbox.mutate(ind1)
            toolbox.mutate(ind2)
            del ind1.fitness.values, ind2.fitness.values

        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit


        pop = toolbox.select(pop + offspring, MU)
        record = stats.compile(pop)
        logbook.record(gen=gen, evals=len(invalid_ind), **record)
        print(logbook.stream)

    print("Final population hypervolume is %f" % hypervolume(pop, [11.0, 11.0]))

    return pop, logbook


if __name__ == "__main__":
    pop, stats = main()


In [None]:
pop.sort(key=lambda x: x.fitness.values)
front = numpy.array([ind.fitness.values for ind in pop])
plt.scatter(front[:,0], front[:,1], c="b")
plt.axis("tight")
plt.xlabel('Accuracy (%)')
plt.ylabel('Gaussian Regulariser')
plt.title('NSGA-II plot')
plt.show()