## Library Imports

In [1]:
import random
import math
import datetime
import operator
import torch
import torchvision

import torchvision.transforms as transforms
import torchvision.models as models
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt

import numpy as np

from deap import creator, base, tools, algorithms, benchmarks
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


## Data Importing

In [2]:
def get_train_valid_loader(data_dir, 
                           batch_size, 
                           random_seed,
                           shuffle_dataset,
                           validation_split,
                           tf):

    train_ = torchvision.datasets.CIFAR10(root=data_dir, train=True,
                                            download=True, transform=transform)

    #Creating data indices for training and validation splits:
    train_dataset_size = len(train_)
    indices = list(range(train_dataset_size))
    split = int(np.floor(validation_split * train_dataset_size))

    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_indices,val_indices = indices[split:], indices[:split]


    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    train_loader = torch.utils.data.DataLoader(train_, batch_size=batch_size, 
                                               sampler=train_sampler)
    validation_loader = torch.utils.data.DataLoader(train_, batch_size=batch_size,
                                                    sampler=valid_sampler)
    
    return (train_loader, validation_loader)



def get_train_lodader(data_dir,
                   batch_size,
                   random_seed,
                   shuffle_dataset,
                   transform):

    train_ = torchvision.datasets.CIFAR10(root=data_dir, train=True,
                                            download=True, transform=transform)

    train_loader = torch.utils.data.DataLoader(train_, batch_size=batch_size)

    return(train_loader)

def get_test_lodader(data_dir,
                   batch_size,
                   random_seed,
                   shuffle_dataset,
                   transform):

    test_ = torchvision.datasets.CIFAR10(root=data_dir, train=False,
                                            download=True, transform=transform)

    test_loader = torch.utils.data.DataLoader(test_, batch_size=batch_size)

    return(test_loader)

In [3]:
random_seed= 1

torch.manual_seed(1)
np.random.seed(1)

In [4]:
validation_split = 0
shuffle_dataset = True

batch_size = 32
data_dir = './data'

### Classes from CIFAR10
classes = ('plane', 'car', 'bird', 'cat',
            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

transform = transforms.Compose([transforms.Resize(32),transforms.ToTensor(),
                                    transforms.Normalize(
                                        mean=[0.485, 0.456, 0.406], 
                                        std=[0.229, 0.224, 0.225])])
# transform = transforms.Compose(
#     [transforms.ToTensor(),
#      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

#train_loader, validation_loader = get_train_valid_loader('./data', batch_size, random_seed, shuffle_dataset, validation_split, transform)


train_loader = get_train_lodader('./data', batch_size, random_seed, shuffle_dataset, transform)
test_loader = get_test_lodader('./data', batch_size, random_seed, shuffle_dataset, transform)


Files already downloaded and verified
Files already downloaded and verified


## Neural Network

### Initialisation and functions

In [5]:
def sum_squared_diff(actual, predicted):
    sum_ = 0
    for yi, yi_hat in zip(actual, predicted):
        sum_ += ((yi-yi_hat)**2)
    
    return sum_

def mse(s, n):
    return (s/n)

In [6]:
### Training function
### https://debuggercafe.com/transfer-learning-using-efficientnet-pytorch/
def train(model, trainloader, optimizer, criterion):    
    model.train()
    
    running_loss = 0.0
    running_correct = 0
    
    count = 0

    print(len(trainloader))
    
    for i, data in tqdm(enumerate(trainloader), total=len(trainloader)):

        count += 1
        image, labels = data
        
        image = image.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        # Forward pass.
        outputs = model(image)
        
        # Calculate the loss.
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        
        # Calculate the accuracy.
        _, preds = torch.max(outputs.data, 1)
        running_correct += (preds == labels).sum().item()
        
        # Backpropagation
        loss.backward()
        
        # Update the weights.
        optimizer.step()
    
    # Loss and accuracy for the complete epoch.
    epoch_loss = running_loss / count
    epoch_acc = 100. * (running_correct / len(trainloader.dataset))
    return epoch_loss, epoch_acc

def validate(model, testloader, criterion):
    model.eval()
    
    running_loss = 0.0
    running_correct = 0
    
    count = 0
    sum_ = 0
    with torch.no_grad():
        #for i, data in tqdm(enumerate(testloader), total=len(testloader)):
        for i, data in enumerate(testloader):
            count += 1
            image, labels = data
            image = image.to(device)
            labels = labels.to(device)
            
            # Forward pass.
            outputs = model(image)
            
            # Calculate the loss.
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            # Calculate the accuracy.
            _, preds = torch.max(outputs.data, 1)
            running_correct += (preds == labels).sum().item()
            
            sum_ += sum_squared_diff(labels,preds)
        
    # Loss and accuracy for the complete epoch.
    epoch_loss = running_loss / count
    epoch_acc = 100. * (running_correct / len(testloader.dataset))
    # MSE for validation
    MSE = mse(sum_, len(test_loader.dataset))
    
    return epoch_loss, epoch_acc, MSE

In [12]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 8, 5)
        self.fc1 = nn.Linear(200, 32)
        self.fc3 = nn.Linear(32, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.fc3(x)
        return x

In [13]:
def save_model(epochs, model, optimizer, criterion, pretrained, time, val):
    torch.save({
                'epoch': epochs,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': criterion,
                }, f"./model_efficientNetB0_{epochs}_{val}_{time}.pth")    


## Load saved model
def load_model(name):
    model = torch.load(name)
    return model


def build_model(num_classes, pre_trained=True, fine_tune=True, saved=None):
#     model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)

    model = MyEfficientNet()
    
    if fine_tune:
        print('[INFO]: Fine-tuning all layers...')
        for params in model.parameters():
            params.requires_grad = True
            
    elif not fine_tune:
        print('[INFO]: Freezing hidden layers...')
        for name, param in list(model.named_parameters())[:-2]:
            param.requires_grad = False
            
        for name, param in list(model.named_parameters())[-2:]:
            param.requires_grad = True
        
#         for params in model.parameters():
#             params.requires_grad = False
    
#     model.classifier[1] = nn.Linear(in_features=1280, out_features=num_classes) 
    
    
    if saved:
        loaded_dict = load_model(saved)
        model.load_state_dict(loaded_dict['model_state_dict'])
    
    return model

In [14]:
SEED = 123
np.random.seed(SEED)

LOADED = False
load_model_name = "model_efficientNetB0_5_2022-11-22_16-50.pth"

# Loads model weights
pretrained = False
# False -> freezes all the layers but the last one; True -> unfreezes all the layers
fine_tune = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Computation device: {device}")

lr = 1e-3
print(f"Learning rate: {lr}")

epochs = 20
print(f"Number of epochs: {epochs}")

# Save every x epochs
save_epoch = 5

Computation device: cuda
Learning rate: 0.001
Number of epochs: 20


### Loading the Model

In [74]:
# Loading the model
model = Net()
# Moving the model to the device (GPU)
model.to(device)


Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 8, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=200, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=10, bias=True)
)

In [75]:
# Optimizer.
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
# optimizer = torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
# Loss function.
criterion = nn.CrossEntropyLoss()
# Learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode = 'min',patience=3, cooldown=2, factor=0.1)
# Model saving interval
save_interval = 10

### Training the Model

In [16]:
time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")

if not LOADED:
  # Lists to keep track of losses and accuracies.
  train_loss, valid_loss = [], []
  train_acc, valid_acc = [], []
  valid_mse = []

  # Start the training.
  for epoch in range(epochs):
      print(f"[INFO]: Epoch {epoch+1} of {epochs}")
      print("LR:{%s}" %optimizer.param_groups[0]['lr'])
      train_epoch_loss, train_epoch_acc = train(model, train_loader, optimizer, criterion)
      valid_epoch_loss, valid_epoch_acc, valid_epoch_mse = validate(model, test_loader, criterion)
      
      scheduler.step(valid_epoch_mse)  
        
      train_loss.append(train_epoch_loss)
      valid_loss.append(valid_epoch_loss)
      train_acc.append(train_epoch_acc)
      valid_acc.append(valid_epoch_acc)
      valid_mse.append(valid_epoch_mse)
      print(f"Training loss: {train_epoch_loss:.3f}, training acc: {train_epoch_acc:.3f}")
      print(f"Validation loss: {valid_epoch_loss:.3f}, validation acc: {valid_epoch_acc:.3f}, mse:{valid_epoch_mse:.3f}")
      print('-'*50)

      if epoch % save_interval == 0:
        print("Save Model")
        save_model(epochs, model, optimizer, criterion, pretrained, time, valid_epoch_acc)
    
  # Save the trained model weights.
  save_model(epochs, model, optimizer, criterion, pretrained, time, valid_acc[-1])
  print('TRAINING COMPLETE')


[INFO]: Epoch 1 of 20
LR:{0.001}
1563


100%|██████████████████████████████████████| 1563/1563 [00:13<00:00, 112.37it/s]


Training loss: 2.104, training acc: 21.678
Validation loss: 1.791, validation acc: 34.360, mse:13.003
--------------------------------------------------
Save Model
[INFO]: Epoch 2 of 20
LR:{0.001}
1563


100%|██████████████████████████████████████| 1563/1563 [00:14<00:00, 111.57it/s]


Training loss: 1.644, training acc: 39.766
Validation loss: 1.535, validation acc: 43.420, mse:11.526
--------------------------------------------------
[INFO]: Epoch 3 of 20
LR:{0.001}
1563


100%|██████████████████████████████████████| 1563/1563 [00:13<00:00, 113.49it/s]


Training loss: 1.491, training acc: 45.746
Validation loss: 1.441, validation acc: 47.410, mse:10.696
--------------------------------------------------
[INFO]: Epoch 4 of 20
LR:{0.001}
1563


100%|██████████████████████████████████████| 1563/1563 [00:14<00:00, 110.75it/s]


Training loss: 1.404, training acc: 49.584
Validation loss: 1.368, validation acc: 50.620, mse:9.837
--------------------------------------------------
[INFO]: Epoch 5 of 20
LR:{0.001}
1563


  7%|██▋                                    | 107/1563 [00:00<00:13, 108.32it/s]


KeyboardInterrupt: 

### Validation

In [47]:
validate(model, test_loader, criterion)

(2.3087721465113824, 11.31, tensor(13.5055, device='cuda:0'))

## Adam-SLPso

In [76]:
class Adam_SLPSO():
    def __init__(self,dimension):     
        self.populationSize  = 5
        self.iterations      = 3

        # Parameter Setup - PSO
        self.posMinInit      = - 1
        self.posMaxInit      = + 1
        self.VMaxInit        = 1.5
        self.VMinInit        = 0.5
        self.interval        = 10
        self.dimension       = dimension

        self.alpha = 0.5
        self.beta = 1e-3
        self.epsilon = self.beta * (self.dimension/self.populationSize)
        self.cons_m = self.populationSize + math.floor(self.dimension/10)
        self.best_losses = []

        self.c1 = 2
        self.c2 = 2

        # Deap Initialisation
        self.toolbox = base.Toolbox()
        
        self.toolbox.register("update", self.updateParticleSLPSO)
        self.toolbox.register("particle", self.generate, size=dimension, smin=self.posMinInit, smax=self.posMaxInit)
        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.particle)
        creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) # -1 is for minimise
        creator.create("Particle", list, fitness=creator.FitnessMin, speed=list, smin=None, smax=None, best=None)

        self.toolbox.register("evaluate", self.evalNet) 

        self.pop = self.toolbox.population(n=self.populationSize) 
        
        self.best = None

        self.stats = tools.Statistics(lambda ind: ind.fitness.values)
        self.stats.register("avg", np.mean)
        self.stats.register("std", np.std)
        self.stats.register("min", np.min)
        self.stats.register("max", np.max)

        self.logbook = tools.Logbook()
        self.logbook.header = ["gen", "evals"] + self.stats.fields

    
    def processing(self,generations, patience):
        gen = 0
        improving = True
        counter_stagnation = 0
        while(improving and gen<generations):
            print("   -- Generation %i --" % gen)
            gen_mse = []
            
            #Evaluation of particles
            for part in self.pop:
                fitness = self.toolbox.evaluate(part)
                part.fitness.values = fitness
                gen_mse.append(fitness)

            #Setting the 0th old_gbest
            if(gen==0):
                old_gbest = self.pop[0].fitness.values
                
            # Saving gbest
            self.best_ind = tools.selBest(self.pop, 1)[0]

            #Sorting particles by fitness
            self.pop.sort(key=lambda x: x.fitness, reverse=True)
            mu = self.calculate_mu(self.pop)
            
            for i in range(len(self.pop)):
                print(self.pop[i].fitness.values)
    

            #for i  in reversed(range(len(self.pop)-1)):  # start with worst particle, and go in reverse towards best
                                                          # don't do element 0 (best). Hence the i+1 below.
            for i  in range(len(self.pop)-1):
                if random.uniform(0, 1) < self.calculate_l_prob(i): # learning probability for that particle
                          self.toolbox.update(self.pop[i], self.pop, mu, i)
            
            ### Checking if the fitness improves 
            if (old_gbest <= self.best_ind.fitness.values):
                print("TRUE")
                counter_stagnation +=1
                if(counter_stagnation == patience):
                    improving = False
            else:
                print("FALSE")
                counter_stagnation = 0
                old_gbest = self.best_ind.fitness.values


            print(counter_stagnation)
            print(old_gbest)
            print(self.best_ind.fitness.values)
            
            self.best_ind = tools.selBest(self.pop, 1)[0]

            gen +=1
            
            # Saving the best individual in the generation
            best_loss = min(gen_mse)
            print(f"*** BEST loss in generation {gen} = {best_loss}")
            self.best_losses.append(best_loss)

        
    
   
    # ************************************************
    # calculate_mu(pop) :
    # Calculating mu values for particle update
    # INPUT: 
    #   list  -    pop      - List of populations
    # 
    # OUTPUT : 
    #   list  -    mu_list  - List of mu values 
    # ************************************************
    def calculate_mu(self, population):
        mu_list = list()
        for i in range(self.dimension):
            temp = 0
            for j in population:
                temp += j[i]
            temp /= self.populationSize
            mu_list.append(temp)
        return mu_list

    # ************************************************
    # calculate_l_prob(idx) :
    # Calculating learning probability to assess if a particle
    # needs to be updated
    # INPUT: 
    #   int  -    idx  -    index i of population
    # 
    # OUTPUT : 
    #   int  -    l_p  -    learning probability return
    # ************************************************
    def calculate_l_prob(self, idx):
        l_p = (1-((idx-1)/self.cons_m))**(self.alpha*math.log(math.sqrt(math.ceil(self.dimension/self.populationSize))))   
        return l_p

    
        
    # ************************************************
    # updateParticleSLPSO(part, pop, mu, i) :
    # calculating new values for particles in population 
    #
    # INPUT: 
    #   obj  -    part  -    single unit of SL-PSO algorithm
    #   obj  -    pop  -    population
    #   list  -    mu  -   list of calculated mu
    # 
    # OUTPUT : 
    #   int  -    MSE  -    Mean Square Error
    # ************************************************
    def updateParticleSLPSO(self, part, pop, mu, i):
        demonstrator=random.choice(list(pop[i:len(pop)]))

        r1 = [random.uniform(0, 1)*0.05]*dimension
        r2 = [random.uniform(0, 1)*0.9]*dimension
        r3 = [random.uniform(0, 1)*0.9]*dimension

        v_r0 = [x for x in map(operator.mul, r1, part.speed)]
        v_r1 = [x for x in map(operator.mul, r2, map(operator.sub, demonstrator, part))] # learning from the demonstrator
        v_r2 = [self.epsilon*x for x in map(operator.mul, r3, map(operator.sub, mu, part))] # social learning from the mean
        part.speed = [x for x in map(operator.add, v_r0, map(operator.add, v_r1, v_r2))]
       
        part[:] = list(map(operator.add, part, part.speed))
    
    # ************************************************
    # evalNet(individual) :
    # Calculating the accuracy of the model, in this case on CIFAR-100
    # using the network weights calculated by an individual 
    #
    # INPUT: 
    #   obj  -    individual  -    single unit of SL-PSO algorithm
    # 
    # OUTPUT : 
    #   int  -    MSE  -    Mean Square Error
    # ************************************************
    def generate(self, size, smin, smax):
        part = creator.Particle(random.uniform(self.posMinInit, self.posMaxInit) for _ in range(size)) 
        part.speed = [random.uniform(self.VMinInit, self.VMaxInit) for _ in range(size)]
        part.smin = smin #speed clamping values
        part.smax = smax
        return part

    # ************************************************
    # evalNet(individual) :
    # Calculating the accuracy of the model, in this case on CIFAR-100
    # using the network weights calculated by an individual 
    #
    # INPUT: 
    #   obj  -    individual  -    single unit of SL-PSO algorithm
    # 
    # OUTPUT : 
    #   int  -    MSE  -    Mean Square Error
    # ************************************************
    def evalNet(self, individual):
        # Updating the network with individual's decision variable
        self.updateWeights(individual)


        loss, accuracy, MSE = validate(model, test_loader, criterion)        
        print(f"Accuracy : {accuracy}; Loss: {loss}; MSE: {MSE}")
        return (MSE,)
    
    # ************************************************
    # updateWeights(individual) :
    # Updating the weights of a NN using the predicted dimensions
    # of an individual
    #
    # INPUT: 
    #   obj  -    individual  -    single unit of SL-PSO algorithm
    # 
    def updateWeights(self, individual):
        # Getting the state dictionary which includes weights and biases
        state_dict = model.state_dict()

        # Counter
        prev = 0

        for name, param in model.named_parameters():
          if param.requires_grad:
            if ('weight' or 'bias' in name):
              # Getting the shape of the current weight/bias
              shape = param.shape

              # Getting the number of variables that make up the parameter
              num_var = len(param.flatten())

              # Getting a subset of the particle's decision variables and shaping it
              # to fit the network
              new_parameter = np.reshape(individual[prev:prev+num_var], shape)

              # Updating the state variable
              state_dict[name] = torch.Tensor(new_parameter)
              prev += num_var

        # Updating the network with the weights
        model.load_state_dict(state_dict)        
        


### Main Loop

In [77]:
dimension = 0 
for name, param in model.named_parameters():
  if param.requires_grad:
    if ('weight' or 'bias' in name):
      dimension += len(param.flatten())

print(f"Total number of parameters: {dimension}")

Total number of parameters: 16938


In [None]:
evolutions = 10
pso = Adam_SLPSO(dimension)

for evol in range(evolutions):
    print("** EVOLUTION %i **" % evol)

    pso.processing(10,4)
    

** EVOLUTION 0 **
   -- Generation 0 --
Accuracy : 10.81; Loss: 1680.8312060078874; MSE: 15.049699783325195
Accuracy : 10.92; Loss: 2043.292699649311; MSE: 14.060599327087402
Accuracy : 10.86; Loss: 2673.1448635308507; MSE: 18.61629867553711
Accuracy : 9.41; Loss: 2533.1805443321937; MSE: 19.15989875793457
Accuracy : 8.67; Loss: 3736.907242942542; MSE: 16.564899444580078
(tensor(14.0606, device='cuda:0'),)
(tensor(15.0497, device='cuda:0'),)
(tensor(16.5649, device='cuda:0'),)
(tensor(18.6163, device='cuda:0'),)
(tensor(19.1599, device='cuda:0'),)
FALSE
0
(tensor(14.0606, device='cuda:0'),)
(tensor(14.0606, device='cuda:0'),)
*** BEST loss in generation 1 = (tensor(14.0606, device='cuda:0'),)
   -- Generation 1 --
Accuracy : 9.370000000000001; Loss: 1350.8945606950754; MSE: 19.85610008239746
Accuracy : 8.7; Loss: 558798.0472244408; MSE: 15.16469955444336
Accuracy : 9.26; Loss: 5466.819074636831; MSE: 10.151299476623535
Accuracy : 10.96; Loss: 146871.72885882587; MSE: 18.6658992767334
A

Accuracy : 10.879999999999999; Loss: 45591740.38338658; MSE: 20.744600296020508
Accuracy : 12.1; Loss: 829249.7993210863; MSE: 16.828899383544922
Accuracy : 12.280000000000001; Loss: 16891413.134185303; MSE: 15.414999961853027
