In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cuda')
#torch.cuda.is_available()

Hyper-paramètres :

In [31]:
# Hyper-parameters
num_epochs = 10
sample_rate = 0.1

In [3]:
torch.zeros(1).cuda()

tensor([0.], device='cuda:0')

Preprocessing et chargement du dataset

In [30]:
import time
# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

  # CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                             train=True, 
                                             transform=transform,
                                             download=True)

test_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                            train=False, 
                                            transform=transforms.ToTensor())

def split_dataset(dataset, rate):
  sample_size = int(rate * len(dataset))
  left_size = len(dataset) - sample_size
  seed = int(time.time())
  generator = torch.Generator().manual_seed(seed)
  sample, left = torch.utils.data.random_split(dataset, [sample_size, left_size], generator=generator)
  return sample, left

def get_data(batch_size):

  # Selection 10%

  train_sample_size = int(sample_rate * len(train_dataset))
  train_dataset_sample, _ = split_dataset(train_dataset, sample_rate)

  # validation set

  #train_size = int(0.5 * len(train_dataset_sample))
  #val_size = len(train_dataset_sample) - train_size
  val_subset, _ = split_dataset(train_dataset, 0.01)
  print("train size : ", len(train_dataset_sample), " val_size : ", len(val_subset), " total : ", len(val_subset) + len(train_dataset_sample))

  # Data loader

  train_loader = torch.utils.data.DataLoader(dataset=train_dataset_sample, shuffle=True, batch_size=batch_size)
  val_loader = torch.utils.data.DataLoader(dataset=val_subset, shuffle=False, batch_size=batch_size)
  test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)
  
  return train_loader, val_loader, test_loader

Files already downloaded and verified


In [5]:
# 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                     stride=stride, padding=1, bias=False)

# Residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

# ResNet
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(3, 16)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [6]:
# For updating learning rate
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [7]:
#!pip install optuna
import optuna
from optuna.trial import TrialState

In [13]:
def train_model(model, learning_rate, train_loader, criterion, optimizer):
  # Train the model
  total_step = len(train_loader)
  curr_lr = learning_rate

  for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #if i == 0 or i == len(train_loader) - 1:
        #    print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
        #           .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
    # Decay learning rate
    if (epoch+1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)
    
    #trial.report(loss.item(), step=epoch)
    #if trial.should_prune():
    #    raise optuna.TrialPruned()
    

In [14]:
def objective(trial, lr_n=0, bs_n=0):
  if trial != None:
    lr_n = trial.suggest_float('learning_rate', 0, 1)
    bs_n = trial.suggest_float('batch_size', 0, 1)

  #print('CALL')

  bs = int(20 + bs_n * 180)
  lr = 10 ** (-3*lr_n - 2)

  # Possibles : nb_couche, learning_rate decay, learning_rate decay time, augmentation de données paramètres ?

  train_loader, val_loader, test_loader = get_data(bs)

  # model = ResNet(ResidualBlock, [2, 2, 2]).to(device)
  model = torchvision.models.resnet18(pretrained=False).to(device)
  # Loss and optimizer
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=lr)

  train_model(model, lr, train_loader, criterion, optimizer)

  # Validation
  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

  accuracy = 100 * correct / total
  return accuracy

In [None]:
#!pip install stable_baselines --upgrade
#!pip install gym

In [10]:
import numpy as np
import stable_baselines3 as sb3
import gym
from gym import spaces
from gym.envs.registration import register

In [None]:
#!pip install git+https://github.com/nathanrooy/particle-swarm-optimization

In [37]:
from pso import pso_simple

In [None]:
# Custom Samplers (PSO + SAC)

class PSOSampler(optuna.samplers.BaseSampler):
    def __init__(self, x0, bounds, costFunc num_particles=5):
        self._rng = np.random.RandomState()
        self._current_trial = None  # Current state.
        self.num_particles = num_particles
        self.bounds = bounds
        self.costFunc = costFunc

        self.swarm = []
        for i in range(0, num_particles):
          self.swarm.append(Particle(x0))

    def sample_relative(self, study, trial, search_space):
        if search_space == {}:
            return {}
            
        # cycle through particles in swarm and evaluate fitness
        for j in range(0, num_particles):
            self.swarm[j].evaluate(self.costFunc)

            # determine if current particle is the best (globally)
            if self.swarm[j].err_i<err_best_g or err_best_g==-1:
                pos_best_g=list(self.swarm[j].position_i)
                err_best_g=float(self.swarm[j].err_i)
        
        # cycle through swarm and update velocities and position
        for j in range(0, num_particles):
            self.swarm[j].update_velocity(pos_best_g)
            self.swarm[j].update_position(bounds)

        # 3. Sample parameters from the neighborhood of the current point.
        # The sampled parameters will be used during the next execution of
        # the objective function passed to the study.
        params = {}
        for i, param_name, param_distribution in enumerate(search_space.items()):

            if (
                not isinstance(param_distribution, optuna.distributions.FloatDistribution)
                or (param_distribution.step is not None and param_distribution.step != 1)
                or param_distribution.log
            ):
                msg = (
                    "Only suggest_float() with `step` `None` or 1.0 and"
                    " `log` `False` is supported"
                )
                raise NotImplementedError(msg)

            
            mean = np.mean(self.swarm[i].position_i)
            std = np.std(self.swarm[i].position_i)
            params[param_name] = self._rng.normal(mean, std)

        return params

    # The rest are unrelated to SA algorithm: boilerplate
    def infer_relative_search_space(self, study, trial):
        return optuna.samplers.intersection_search_space(study)

    def sample_independent(self, study, trial, param_name, param_distribution):
        independent_sampler = optuna.samplers.RandomSampler()
        return independent_sampler.sample_independent(study, trial, param_name, param_distribution)

In [None]:
class optimEnv(gym.Env):
    metadata = {"render_modes": [], "render_fps": 4}

    def __init__(self, objective, render_mode=None, iterations=10):
        assert render_mode is None
        self.observation_space = spaces.Box(0, 100, shape=(1,), dtype=np.float32)
        self.action_space = spaces.Box(low=np.array([0, 0]), high=np.array([1, 1]), dtype=np.float32)
        self.ovjective = objective
        self.iterations = iterations

    def _get_obs(self):
        return self.objective(self.agent_location)

    def _get_info(self):
        return {"current_location" : self.agent_location}
    
    def reset(self, seed=None, options=None):
        # We need the following line to seed self.np_random
        super().reset(seed=seed)
        self.current_step = 0
        self.max_observation = 0

        # Choose the agent's location uniformly at random
        self._agent_location = self.np_random.integers(0.0, 1.0, size=2, dtype=np.float32)

        observation = self._get_obs()
        info = self._get_info()

        return observation, info

    def step(self, action):
        self._agent_location = action
        # An episode is done iff current_step == iterations

        observation = self._get_obs()
        info = self._get_info()

        self.current_step += 1
        terminated = self.current_step == self.iterations

        reward = observation - self.max_observation
        if (reward > 0) :
          self.max_observation = observation

        return observation, reward, terminated, False, info

In [None]:
class SACSampler(optuna.samplers.BaseSampler):
    def __init__(self, objective, num_episodes=10):
        self._rng = np.random.RandomState()
        self._current_trial = None  # Current state.
        self.env = optimEnv(objective, iterations=num_episodes)
        self.model = SAC(MlpPolicy, self.env, verbose=1)

    def sample_relative(self, study, trial, search_space):
        if search_space == {}:
            return {}

        self.env.reset()
        self.model.learn(total_timesteps=10*num_episodes, log_interval=10)
        action, _ = model.policy.predict(obs)
        
        # 3. Sample parameters from the neighborhood of the current point.
        # The sampled parameters will be used during the next execution of
        # the objective function passed to the study.
        params = {}
        for i, param_name, param_distribution in enumerate(search_space.items()):

            if (
                not isinstance(param_distribution, optuna.distributions.FloatDistribution)
                or (param_distribution.step is not None and param_distribution.step != 1)
                or param_distribution.log
            ):
                msg = (
                    "Only suggest_float() with `step` `None` or 1.0 and"
                    " `log` `False` is supported"
                )
                raise NotImplementedError(msg)

            params[param_name] = action[i]

        return params

    # The rest are unrelated to SA algorithm: boilerplate
    def infer_relative_search_space(self, study, trial):
        return optuna.samplers.intersection_search_space(study)

    def sample_independent(self, study, trial, param_name, param_distribution):
        independent_sampler = optuna.samplers.RandomSampler()
        return independent_sampler.sample_independent(study, trial, param_name, param_distribution)

In [15]:
from bayes_opt import BayesianOptimization
import time

In [54]:
# make data (#runs, #calls)

# Optuna
def get_run_optuna(study):

    run = []
    trials = study.get_trials()
    for trial in trials:
        value = trial.value
        if value == None:
            run.append(run[-1])
        else:
            run.append(value)
    
    return run

# BO
def get_run_BO(optimizer):
    run = []
    for res in optimizer.res:
        run.append(res)
    return run

# PSO
from random import random
from random import uniform

#--- MAIN ---------------------------------------------------------------------+

class Particle:
    def __init__(self, x0):
        self.position_i=[]          # particle position
        self.velocity_i=[]          # particle velocity
        self.pos_best_i=[]          # best position individual
        self.err_best_i=-1          # best error individual
        self.err_i=-1               # error individual

        for i in range(0,num_dimensions):
            self.velocity_i.append(uniform(-1,1))
            self.position_i.append(x0[i])

    # evaluate current fitness
    def evaluate(self,costFunc):
        self.err_i=costFunc(self.position_i)

        # check to see if the current position is an individual best
        if self.err_i<self.err_best_i or self.err_best_i==-1:
            self.pos_best_i=self.position_i.copy()
            self.err_best_i=self.err_i
                    
    # update new particle velocity
    def update_velocity(self,pos_best_g):
        w=0.5       # constant inertia weight (how much to weigh the previous velocity)
        c1=1        # cognative constant
        c2=2        # social constant
        
        for i in range(0,num_dimensions):
            r1=random()
            r2=random()
            
            vel_cognitive=c1*r1*(self.pos_best_i[i]-self.position_i[i])
            vel_social=c2*r2*(pos_best_g[i]-self.position_i[i])
            self.velocity_i[i]=w*self.velocity_i[i]+vel_cognitive+vel_social

    # update the particle position based off new velocity updates
    def update_position(self,bounds):
        for i in range(0,num_dimensions):
            self.position_i[i]=self.position_i[i]+self.velocity_i[i]
            
            # adjust maximum position if necessary
            if self.position_i[i]>bounds[i][1]:
                self.position_i[i]=bounds[i][1]

            # adjust minimum position if neseccary
            if self.position_i[i]<bounds[i][0]:
                self.position_i[i]=bounds[i][0]

def pso_run(costFunc, x0, bounds, num_particles, maxiter, verbose=False):
    global num_dimensions

    num_dimensions=len(x0)
    err_best_g=-1                   # best error for group
    pos_best_g=[]                   # best position for group

    # establish the swarm
    swarm=[]
    for i in range(0,num_particles):
        swarm.append(Particle(x0))

    run = []

    # begin optimization loop
    i=0
    while i<maxiter:
        if verbose: print(f'iter: {i:>4d}, best solution: {err_best_g:10.6f}')
            
        # cycle through particles in swarm and evaluate fitness
        for j in range(0,num_particles):
            swarm[j].evaluate(costFunc)

            run.append(-float(swarm[j].err_i))
            print("run", run)

            # determine if current particle is the best (globally)
            if swarm[j].err_i<err_best_g or err_best_g==-1:
                pos_best_g=list(swarm[j].position_i)
                err_best_g=float(swarm[j].err_i)
        
        # cycle through swarm and update velocities and position
        for j in range(0,num_particles):
            swarm[j].update_velocity(pos_best_g)
            swarm[j].update_position(bounds)
        i+=1

    print("save run", run)
    with open(f"PSO_runs10_calls32_run_{time.time()}", 'wb') as f:
        np.save(f, np.array(run))

    # print final results
    if verbose:
        print('\nFINAL SOLUTION:')
        print(f'   > {pos_best_g}')
        print(f'   > {err_best_g}\n')

    return err_best_g, pos_best_g, run

In [55]:
def benchmark_optuna(n_runs, objective, sampler, name_sampler, n_calls=32) : 
    runs = []

    for _ in range(n_runs) :
        study = optuna.create_study(direction="maximize", sampler=sampler)
        study.optimize(lambda t : objective(t), n_trials=n_calls, timeout=None)
        run = get_run_optuna(study)
        runs.append(run)
    
    with open(f"{name_sampler}_runs{n_runs}_calls{n_calls}", 'wb') as f:
        np.save(f, np.array(runs))
   
    return np.array(runs)

def benchmark_bo(n_runs, objective, n_calls=32):
    runs = []
    pbounds={'x':(0,1),'y':(0,1)}
    for _ in range(n_runs) :
        optimizer = BayesianOptimization(f=lambda x, y : objective(None, x, y), pbounds=pbounds, random_state=int(time.time()))
        optimizer.maximize(init_points=8, n_iter=26)
        run = get_run_BO(optimizer)
        runs.append(run)

    with open(f"BO_runs{n_runs}_calls{n_calls}", 'wb') as f:
        np.save(f, np.array(runs))

    return np.array(runs)

def benchmark_pso(n_runs, objective, n_calls=32):
    runs = []
    initial=[np.random.random(), np.random.random()]
    bounds=[(0,1),(0,1)]
    for _ in range(n_runs) :
        run = pso_run(lambda x : -objective(None, x[0], x[1]), initial, bounds, 4, 8, False)
        print(run)
        runs.append(run)

    with open(f"PSO_runs{n_runs}_calls{n_calls}", 'wb') as f:
        np.save(f, np.array(runs))

    return np.array(runs)

print

<function print>

In [None]:
runs_PSO = benchmark_pso(10, objective)