In [1]:
%cd ..

/Users/Matteo/PycharmProjects/TrafficEmu


# Bayesian optimization: finding the minimum time loss

In this notebook we will performing bayesian optimization on the emulator trained on our sumo simulation. We are interested in identifying the parameters which enable us to obtain the minimum travel time. Our emulator is a gaussian process with an **rbf** kernel.


## Imports

In [2]:
import pickle
import numpy as np
import emukit as ek
import GPy
import pandas as pd

from emukit.model_wrappers import GPyModelWrapper
from emukit.core.initial_designs import RandomDesign
from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement
from emukit.bayesian_optimization.loops import BayesianOptimizationLoop

from sumo_grid_simulation.grid_simulation import Simulator
from experimental_design import config



## Simulator

We start by initialising our simulation.

In [3]:
simulator = Simulator(end_time=300)

We then define the two user functions in which we are interested:

In [4]:
def user_function_time_loss(X):
    """  X = inputs - emukit doesnt pass named args, just an NxM ndarray, N is the number of points to evaluate, M is the number of parameters per each point """
    result = []
    i = 0
    
    print(X)
    print(f'\nUser function time loss called with {X.shape[0]} inputs to simulate')

    
    for gridSize, edgeMaxSpeed, edgeLength, numLanes, accel in X:
        print(f'\nEvaluating input: {i+1} of {X.shape[0]}\n')
        
        beta = 0.05
        max_number_of_vehicles = ((gridSize - 1) * gridSize * 2 + 4 * gridSize) * edgeLength / 5
        period = 300/(max_number_of_vehicles * beta)
        
        s = simulator.simulate(
            gridSize      = int(gridSize),
            edgeMaxSpeed  = edgeMaxSpeed,
            edgeLength    = edgeLength,
            numberOfLanes = int(numLanes),
            accel         = accel,
            trips_generator_period = period
        )
        # average time loss / average route duration
        result.append(s['timeLoss']/s['duration'])
        print(f'\nOutput {result[-1]}\n')
        i += 1
        
    # expand dims is essential or the acquition function breaks
    return np.expand_dims(np.array(result), 1)  

We then load the parameters' space from our configuration file:

In [5]:
parameter_space = config.get_parameter_space()

## Emulator

### Rnadomly initialised emulator

The first emulator we are going to analyse is the one that is just initilised with random points

We first sample at random 200 datapoints from the parameter space:

In [None]:
#init_X, init_Y = pickle.load(open('bayesian_optimization/init_points/520_init_points_timeLoss.pkl', 'rb'))


design = RandomDesign(parameter_space)
num_init_points = 520
init_X = design.get_samples(num_init_points)
init_Y = user_function_time_loss(init_X)
init_points = init_X, init_Y
with open(f'bayesian_optimization/init_points/{num_init_points}_init_points_timeLoss.pkl', "wb") as f:
     pickle.dump(init_points, f)


[[20.         11.36676831 30.57219306  2.          2.4945695 ]
 [ 5.          8.72593315 38.43436756  2.          2.12985565]
 [18.         12.72195183 53.00240726  3.          4.75483272]
 ...
 [ 8.         16.7749665  33.76908728  1.          3.77541186]
 [ 8.         10.72705993 35.72697109  2.          1.6557556 ]
 [16.         16.68808991 50.68652082  2.          2.06663221]]

User function time loss called with 520 inputs to simulate

Evaluating input: 1 of 520

 Retrying in 1 seconds

Output 0.27952690321505913


Evaluating input: 2 of 520

 Retrying in 1 seconds

Output 0.19130150425114453


Evaluating input: 3 of 520

 Retrying in 1 seconds

Output 0.2722274216190922


Evaluating input: 4 of 520

 Retrying in 1 seconds

Output 0.2447104082068818


Evaluating input: 5 of 520

 Retrying in 1 seconds

Output 0.37267625394598386


Evaluating input: 6 of 520

 Retrying in 1 seconds

Output 0.35581245985870263


Evaluating input: 7 of 520

 Retrying in 1 seconds

Output 0.3948142699

 Retrying in 1 seconds

Output 0.30994364660970736


Evaluating input: 97 of 520

 Retrying in 1 seconds

Output 0.3550139963774082


Evaluating input: 98 of 520

 Retrying in 1 seconds

Output 0.2972222222222222


Evaluating input: 99 of 520

 Retrying in 1 seconds

Output 0.36053882725832015


Evaluating input: 100 of 520

 Retrying in 1 seconds

Output 0.25811437403400306


Evaluating input: 101 of 520

 Retrying in 1 seconds

Output 0.4408751038493492


Evaluating input: 102 of 520

 Retrying in 1 seconds

Output 0.4545722713864307


Evaluating input: 103 of 520

 Retrying in 1 seconds

Output 0.404


Evaluating input: 104 of 520

 Retrying in 1 seconds

Output 0.18309614717112116


Evaluating input: 105 of 520

 Retrying in 1 seconds

Output 0.31571399303421427


Evaluating input: 106 of 520

 Retrying in 1 seconds

Output 0.544154540893126


Evaluating input: 107 of 520

 Retrying in 1 seconds

Output 0.24092476171162036


Evaluating input: 108 of 520

 Retrying in 1 seconds

Out

 Retrying in 1 seconds

Output 0.4060795011691348


Evaluating input: 197 of 520

 Retrying in 1 seconds

Output 0.20585906571654788


Evaluating input: 198 of 520

 Retrying in 1 seconds

Output 0.39847133757961783


Evaluating input: 199 of 520

 Retrying in 1 seconds

Output 0.3519359723123513


Evaluating input: 200 of 520

 Retrying in 1 seconds

Output 0.3638149939540508


Evaluating input: 201 of 520

 Retrying in 1 seconds

Output 0.39314456035767514


Evaluating input: 202 of 520

 Retrying in 1 seconds

Output 0.33958333333333335


Evaluating input: 203 of 520

 Retrying in 1 seconds

Output 0.33383010432190763


Evaluating input: 204 of 520

 Retrying in 1 seconds

Output 0.2353960648384137


Evaluating input: 205 of 520

 Retrying in 1 seconds

Output 0.33651172030194676


Evaluating input: 206 of 520

 Retrying in 1 seconds

Output 0.19570528481419572


Evaluating input: 207 of 520

 Retrying in 1 seconds

Output 0.26771237222418043


Evaluating input: 208 of 520

 Retryin

 Retrying in 1 seconds

Output 0.2640491057254736


Evaluating input: 297 of 520

 Retrying in 1 seconds

Output 0.3137490007993605


Evaluating input: 298 of 520

 Retrying in 1 seconds

Output 0.16854201580081396


Evaluating input: 299 of 520

 Retrying in 1 seconds

Output 0.28104369695064446


Evaluating input: 300 of 520

 Retrying in 1 seconds

Output 0.26370757180156656


Evaluating input: 301 of 520

 Retrying in 1 seconds

Output 0.39098011363636365


Evaluating input: 302 of 520

 Retrying in 1 seconds

Output 0.394801545486477


Evaluating input: 303 of 520

 Retrying in 1 seconds

Output 0.36


Evaluating input: 304 of 520

 Retrying in 1 seconds

Output 0.2728068895051817


Evaluating input: 305 of 520

 Retrying in 1 seconds

Output 0.3107951247823564


Evaluating input: 306 of 520

 Retrying in 1 seconds

Output 0.29227124666864074


Evaluating input: 307 of 520

 Retrying in 1 seconds

Output 0.211963589076723


Evaluating input: 308 of 520

 Retrying in 1 seconds

Out

 Retrying in 1 seconds

Output 0.18277852843631004


Evaluating input: 397 of 520

 Retrying in 1 seconds

Output 0.42567567567567566


Evaluating input: 398 of 520

 Retrying in 1 seconds

Output 0.3236794924837953


Evaluating input: 399 of 520

 Retrying in 1 seconds

Output 0.41718838929994306


Evaluating input: 400 of 520

 Retrying in 1 seconds

Output 0.22191500256016386


Evaluating input: 401 of 520

 Retrying in 1 seconds

Output 0.24823997551270274


Evaluating input: 402 of 520

 Retrying in 1 seconds

Output 0.21275469700979094


Evaluating input: 403 of 520

 Retrying in 1 seconds

Output 0.42388953585093997


Evaluating input: 404 of 520

 Retrying in 1 seconds

Output 0.1827637444279346


Evaluating input: 405 of 520

 Retrying in 1 seconds

Output 0.29110666285387476


Evaluating input: 406 of 520

 Retrying in 1 seconds

Output 0.35177182368193605


Evaluating input: 407 of 520

 Retrying in 1 seconds

Output 0.25183268650280294


Evaluating input: 408 of 520

 Retry

To then fit a GP to these points

In [None]:
emulator = GPy.models.GPRegression(init_X, init_Y, noise_var=1e-10)
emukit_model_random_init = GPyModelWrapper(emulator, n_restarts=5)
emukit_model_random_init.optimize()

### Experimentally designed emulator with model variance

We also load the emulator obtained during experimental design:

In [None]:
emukit_model_variance = pickle.load(open('experimental_design/models/5param_model_variance_20_init_points_500_loops_timeloss_per_duration.pkl', 'rb'))

### Experimentally designed emulator with integrated variance reduction

We also load the emulator obtained during experimental design:

In [None]:
emukit_model_integrated_variance = pickle.load(open('experimental_design/models/5param_integrated_variance_reduction_20_init_points_500_loops_timeloss_per_duration.pkl', 'rb'))

## Bayesian optimisation

We now run bayesian optimisation on the emulators

In [None]:
n_iter_bo = 50

### Acquisition functions

In [None]:
acquisition_random_init = ExpectedImprovement(emukit_model_random_init)

In [None]:
acquisition_model_variance = ExpectedImprovement(emukit_model_variance)

In [None]:
acquisition_integrated_variance = ExpectedImprovement(emukit_model_integrated_variance)

### Optimisation loop

In [None]:
bo_random_init = BayesianOptimizationLoop(parameter_space, emukit_model_random_init, acquisition=acquisition_random_init)
bo_random_init.run_loop(user_function_time_loss, n_iter_bo)

In [None]:
bo_model_variance = BayesianOptimizationLoop(parameter_space, emukit_model_variance, acquisition=acquisition_model_variance)
bo_model_variance.run_loop(user_function_time_loss, n_iter_bo)

In [None]:
bo_integrated_variance = BayesianOptimizationLoop(parameter_space, emukit_model_integrated_variance, acquisition=acquisition_integrated_variance)
bo_integrated_variance.run_loop(user_function_time_loss, n_iter_bo)

## Results

In [None]:
parameters_name = ['Size of the grid', 'Speed limit in the net (m/s)', 'Length of the roads (m)', 'Number of lanes', 'Accelleration (m/s^2)']

### Results on the random initialised emulator

In [None]:
results_random_init = bo_random_init.get_results()

In [None]:
results_random_init_df = pd.DataFrame(results_random_init.minimum_location.reshape(1,-1))
results_random_init_df.columns = parameters_name
results_random_init_df

In [None]:
results_random_init.minimum_value*100

### Results on the experimentally designed emulator with model variance

In [None]:
results_model_variance = bo_model_variance.get_results()

In [None]:
results_random_init_df = pd.DataFrame(results_model_variance.minimum_location.reshape(1,-1))
results_random_init_df.columns = parameters_name
results_random_init_df

In [None]:
results_model_variance.minimum_value*100

### Results on the experimentally designed emulator with integrated variance reduction

In [None]:
results_integrated_variance = bo_integrated_variance.get_results()

In [None]:
results_random_init_df = pd.DataFrame(results_integrated_variance.minimum_location.reshape(1,-1))
results_random_init_df.columns = parameters_name
results_random_init_df

In [None]:
results_integrated_variance.minimum_value*100

## Save stuff

In [None]:
with open(f'bayesian_optimization/bayesian_opt_results/results_random_init_timeLoss.pkl', "wb") as f:
     pickle.dump(results_random_init, f)

with open(f'bayesian_optimization/bayesian_opt_results/results_model_variance_timeLoss.pkl', "wb") as f:
     pickle.dump(results_model_variance, f)
        
with open(f'bayesian_optimization/bayesian_opt_results/results_integrated_variance_timeLoss.pkl', "wb") as f:
     pickle.dump(results_integrated_variance, f)

In [None]:
with open(f'bayesian_optimization/models/520_random_init_50_bay_opt_timeLoss.pkl', "wb") as f:
     pickle.dump(emukit_model_random_init, f)

with open(f'bayesian_optimization/models/520_model_variance_50_bay_timeLoss.pkl', "wb") as f:
     pickle.dump(emukit_model_variance, f)
        
with open(f'bayesian_optimization/models/520_integrated_variance_50_bay_timeLoss.pkl', "wb") as f:
     pickle.dump(emukit_model_integrated_variance, f)