In [1]:
%cd ..

/Users/Matteo/PycharmProjects/TrafficEmu


# Bayesian optimization: finding the minimum time loss

In this notebook we will performing bayesian optimization on the emulator trained on our sumo simulation. We are interested in identifying the parameters which enable us to obtain the minimum travel time. Our emulator is a gaussian process with an **rbf** kernel.


## Imports

In [30]:
import pickle
import numpy as np
import emukit as ek
import GPy
import pandas as pd

from emukit.model_wrappers import GPyModelWrapper
from emukit.core.initial_designs import RandomDesign
from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement
from emukit.bayesian_optimization.loops import BayesianOptimizationLoop

from experimental_design import config
from sumo_grid_simulation.grid_simulation import Simulator

## Simulator

We start by initialising our simulation.

In [3]:
simulator = Simulator(end_time=300)

We then define the two user functions in which we are interested:

In [4]:
def user_function_time_loss(X):
    """  X = inputs - emukit doesnt pass named args, just an NxM ndarray, N is the number of points to evaluate, M is the number of parameters per each point """
    result = []
    i = 0
    
    print(X)
    print(f'\nUser function time loss called with {X.shape[0]} inputs to simulate')

    
    for gridSize, edgeMaxSpeed, maxSpeed, edgeLength, numLanes, accel in X:
        print(f'\nEvaluating input: {i+1} of {X.shape[0]}\n')
        
        alpha = 0.005
        max_number_of_vehicles = ((gridSize - 1) * gridSize * 2 + 4 * gridSize) * edgeLength / 5
        period = 300/(max_number_of_vehicles * alpha)
        
        s = simulator.simulate(
            gridSize      = int(gridSize),
            edgeMaxSpeed  = edgeMaxSpeed,
            maxSpeed      = maxSpeed,
            edgeLength    = edgeLength,
            numberOfLanes = int(numLanes),
            accel         = accel,
            trips_generator_period = period
        )
        # average time loss / average route duration
        result.append(s['timeLoss']/s['duration'])
        print(f'\nOutput {result[-1]}\n')
        i += 1
        
    # expand dims is essential or the acquition function breaks
    return np.expand_dims(np.array(result), 1)  

We then load the parameters' space from our configuration file:

In [5]:
parameter_space = config.get_parameter_space()

## Emulator

### Rnadomly initialised emulator

The first emulator we are going to analyse is the one that is just initilised with random points

We first sample at random 200 datapoints from the parameter space:

In [6]:
init_X, init_Y = pickle.load(open('bayesian_optimization/init_points/270_init_points_timeLoss.pkl', 'rb'))

'''
design = RandomDesign(parameter_space)
num_init_points = 270
init_X = design.get_samples(num_init_points)
init_Y = user_function_time_loss(init_X)
init_points = init_X, init_Y
with open(f'bayesian_optimization/init_points/{num_init_points}_init_points_timeLoss.pkl', "wb") as f:
     pickle.dump(init_points, f)
'''

'\ndesign = RandomDesign(parameter_space)\nnum_init_points = 270\ninit_X = design.get_samples(num_init_points)\ninit_Y = user_function_time_loss(init_X)\ninit_points = init_X, init_Y\nwith open(f\'bayesian_optimization/init_points/{num_init_points}_init_points.pkl\', "wb") as f:\n     pickle.dump(init_points, f)\n'

To then fit a GP to these points

In [7]:
emulator = GPy.models.GPRegression(init_X, init_Y, noise_var=1e-10)
emukit_model_random_init = GPyModelWrapper(emulator, n_restarts=5)
emukit_model_random_init.optimize()

Optimization restart 1/5, f = -430.9457829821215
Optimization restart 2/5, f = -513.918895028995
Optimization restart 3/5, f = 58.98723805204173
Optimization restart 4/5, f = 58.987238197761826
Optimization restart 5/5, f = -513.9188950294711


### Experimentally designed emulator with model variance

We also load the emulator obtained during experimental design:

In [8]:
emukit_model_variance = pickle.load(open('experimental_design/models/model_variance_20_init_points_250_loops_timeloss.pkl', 'rb'))

### Experimentally designed emulator with integrated variance reduction

We also load the emulator obtained during experimental design:

In [9]:
emukit_model_integrated_variance = pickle.load(open('experimental_design/models/integrated_variance_reduction_20_init_points_250_loops_timeloss.pkl', 'rb'))

## Bayesian optimisation

We now run bayesian optimisation on the emulators

In [10]:
n_iter_bo = 40

### Acquisition functions

In [11]:
acquisition_random_init = ExpectedImprovement(emukit_model_random_init)

In [12]:
acquisition_model_variance = ExpectedImprovement(emukit_model_variance)

In [13]:
acquisition_integrated_variance = ExpectedImprovement(emukit_model_integrated_variance)

### Optimisation loop

In [14]:
bo_random_init = BayesianOptimizationLoop(parameter_space, emukit_model_random_init, acquisition=acquisition_random_init)
bo_random_init.run_loop(user_function_time_loss, n_iter_bo)

Optimization restart 1/5, f = -513.918895029527
Optimization restart 2/5, f = -513.9188950223364
Optimization restart 3/5, f = -513.9188950293928
Optimization restart 4/5, f = -513.9188950282564
Optimization restart 5/5, f = -513.9188950288608
[[15.  8. 27. 70.  1.  5.]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.18167094074773923

Optimization restart 1/5, f = -510.05604648694396




Optimization restart 2/5, f = -490.0552009401686
Optimization restart 3/5, f = -510.0560464867178
Optimization restart 4/5, f = -510.0560464801279
Optimization restart 5/5, f = 58.8874122551924
[[20.   8.   5.  70.   1.   1.5]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08285966906656562

Optimization restart 1/5, f = -512.035304247099
Optimization restart 2/5, f = -512.0353045425338
Optimization restart 3/5, f = 58.64188704393408
Optimization restart 4/5, f = -512.0353046798639
Optimization restart 5/5, f = -512.0353046798117
[[20.  25.   5.  70.   1.   1.5]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08285966906656562

Optimization restart 1/5, f = -513.9244503401255
Optimization restart 2/5, f = 58.39479467796457
Optimization restart 3/5, f = 58.39479332939834
Optimization restart 4/5, f = 58.39479388408944
Optimization restart 5/5, f = 



Optimization restart 2/5, f = -527.3831562641896
Optimization restart 3/5, f = -527.3831562660142
Optimization restart 4/5, f = -527.3831562659016
Optimization restart 5/5, f = -527.3831561982274
[[17.         25.          5.         60.93707977  1.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08458563806353714

Optimization restart 1/5, f = -529.8563483545231
Optimization restart 2/5, f = 56.56931887441715
Optimization restart 3/5, f = -529.8563745226498
Optimization restart 4/5, f = -529.8563745259527
Optimization restart 5/5, f = -529.8563745253052
[[18.         20.18874967  5.         55.82856897  1.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08519339738851935

Optimization restart 1/5, f = -532.603492823212
Optimization restart 2/5, f = -532.6034928248464
Optimization restart 3/5, f = -532.6034

 Retrying in 1 seconds
Output 0.09001748650497986

Optimization restart 1/5, f = -576.1214644701797
Optimization restart 2/5, f = -576.1214644696844
Optimization restart 3/5, f = -576.1214644715709
Optimization restart 4/5, f = -576.1214644713261
Optimization restart 5/5, f = -576.121464471627
[[20.          8.          5.         59.31868647  1.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08400162420058878

Optimization restart 1/5, f = -578.8001428637898
Optimization restart 2/5, f = -578.8001428835566
Optimization restart 3/5, f = -578.8001428911562
Optimization restart 4/5, f = -578.8001428911567
Optimization restart 5/5, f = -578.8001428908099
[[20.   8.   5.  70.   1.   1.5]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08285966906656562

Optimization restart 1/5, f = -581.5333207945328
Optimization restart 2/5, f =

In [15]:
bo_model_variance = BayesianOptimizationLoop(parameter_space, emukit_model_variance, acquisition=acquisition_model_variance)
bo_model_variance.run_loop(user_function_time_loss, n_iter_bo)

Optimization restart 1/5, f = -414.61439874911133
Optimization restart 2/5, f = -355.8677752688687
Optimization restart 3/5, f = -414.61439874879045
Optimization restart 4/5, f = -414.61439874913634
Optimization restart 5/5, f = -355.8677766959246
[[20.         15.93533543  5.         41.56336749  1.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08847985217823892

Optimization restart 1/5, f = -416.7732709326083
Optimization restart 2/5, f = -416.77327094129055
Optimization restart 3/5, f = -356.8820287799899
Optimization restart 4/5, f = -416.7732709411754
Optimization restart 5/5, f = -416.77327094155396
[[14.         25.          5.         63.23463779  1.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08745965040501857

Optimization restart 1/5, f = -419.0887355725222
Optimization restart 2/5, f = -3



Optimization restart 4/5, f = -325.98544661673293
Optimization restart 5/5, f = -442.12051883979296
[[20.         21.00066248  5.         59.11924183  3.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08496842580063148

Optimization restart 1/5, f = -444.7647693453917
Optimization restart 2/5, f = -444.76476849794346
Optimization restart 3/5, f = -332.7946211416262
Optimization restart 4/5, f = -444.76476934585475
Optimization restart 5/5, f = -391.58421950310253
[[15.          8.          7.37164175 59.50075484  1.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.09583575762613249

Optimization restart 1/5, f = -447.29036703017505
Optimization restart 2/5, f = -394.5788194403038
Optimization restart 3/5, f = -394.5788194396804
Optimization restart 4/5, f = -447.29036703011445
Optimization restart 5/5, f = 

Optimization restart 3/5, f = -378.351506215625
Optimization restart 4/5, f = -489.79825169637354
Optimization restart 5/5, f = -489.79825167759145
[[ 3.          8.          5.         38.66911554  3.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.10878787878787878

Optimization restart 1/5, f = -492.09602813122007
Optimization restart 2/5, f = -453.52899229305604
Optimization restart 3/5, f = -380.6258111064975
Optimization restart 4/5, f = -492.09602811192013
Optimization restart 5/5, f = -453.52899229517925
[[14.  25.   5.  70.   1.   1.5]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08420152802265321

Optimization restart 1/5, f = -494.7595513496457
Optimization restart 2/5, f = -383.52078264003023
Optimization restart 3/5, f = -456.37670350706037
Optimization restart 4/5, f = -456.37670350868336
Optimization restart 5

In [16]:
bo_integrated_variance = BayesianOptimizationLoop(parameter_space, emukit_model_integrated_variance, acquisition=acquisition_integrated_variance)
bo_integrated_variance.run_loop(user_function_time_loss, n_iter_bo)

Optimization restart 1/5, f = -452.57416906316786
Optimization restart 2/5, f = -452.5741690555608
Optimization restart 3/5, f = -452.57416906003954
Optimization restart 4/5, f = -452.574169062874
Optimization restart 5/5, f = 66.60320353047902
[[20.  25.   5.  70.   1.   1.5]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08285966906656562

Optimization restart 1/5, f = -454.53752973026394
Optimization restart 2/5, f = -454.5375297315962
Optimization restart 3/5, f = -454.537529721688
Optimization restart 4/5, f = -454.53752972818387
Optimization restart 5/5, f = -454.5375297318831
[[14.         25.          5.         40.31475225  1.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08951783903383424

Optimization restart 1/5, f = -456.52853704298013
Optimization restart 2/5, f = -456.5285370433488
Optimization restart 3/5, f 

 Retrying in 1 seconds
Output 0.08285966906656562

Optimization restart 1/5, f = -493.0505280642408




Optimization restart 2/5, f = -493.0505280642263
Optimization restart 3/5, f = -437.9531287584291
Optimization restart 4/5, f = -437.9531590914503
Optimization restart 5/5, f = -493.0505280633001
[[20.          8.          5.         66.89622938  1.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08425165737366642

Optimization restart 1/5, f = -495.52190712307623
Optimization restart 2/5, f = -495.5219071233147
Optimization restart 3/5, f = -495.52190712176514
Optimization restart 4/5, f = -443.3885022039475
Optimization restart 5/5, f = -495.52190712343616
[[20.  25.   5.  70.   1.   1.5]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08285966906656562

Optimization restart 1/5, f = -497.95216716887234
Optimization restart 2/5, f = -451.4771742466812
Optimization restart 3/5, f = -497.9521671690499
Optimization restart 4/5, 

Optimization restart 5/5, f = -537.7387351597029
[[ 3.   8.   5.  70.   1.   1.5]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.09576642335766422

Optimization restart 1/5, f = -539.2150505977169
Optimization restart 2/5, f = -539.2150505820346
Optimization restart 3/5, f = -536.732863736958
Optimization restart 4/5, f = -536.732923486942
Optimization restart 5/5, f = -539.2150505979209
[[20.         17.46745417  5.         49.261693    3.          1.5       ]]

User function time loss called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Output 0.08975945017182131

Optimization restart 1/5, f = -540.8204338607737
Optimization restart 2/5, f = -538.3283131531211
Optimization restart 3/5, f = -538.3283090095704
Optimization restart 4/5, f = -538.3283090450187
Optimization restart 5/5, f = -540.8204338646984
[[20.         21.18396992  5.         56.74217101  1.          1.5       ]]

User

## Results

In [35]:
parameters_name = ['Size of the grid', 'Speed limit in the net (m/s)', 'Max vehicles speed (m/s)', 'Length of the roads (m)', 'Number of lanes', 'Accelleration (m/s^2)']

### Results on the random initialised emulator

In [36]:
results_random_init = bo_random_init.get_results()

In [37]:
results_random_init_df = pd.DataFrame(results_random_init.minimum_location.reshape(1,-1))
results_random_init_df.columns = parameters_name
results_random_init_df

Unnamed: 0,Size of the grid,Speed limit in the net (m/s),Max vehicles speed (m/s),Length of the roads (m),Number of lanes,Accelleration (m/s^2)
0,20.0,8.0,5.957258,53.607237,1.0,1.5


In [38]:
results_random_init.minimum_value*100

8.188153310104529

### Results on the experimentally designed emulator with model variance

In [39]:
results_model_variance = bo_model_variance.get_results()

In [40]:
results_random_init_df = pd.DataFrame(results_model_variance.minimum_location.reshape(1,-1))
results_random_init_df.columns = parameters_name
results_random_init_df

Unnamed: 0,Size of the grid,Speed limit in the net (m/s),Max vehicles speed (m/s),Length of the roads (m),Number of lanes,Accelleration (m/s^2)
0,20.0,8.0,6.04871,70.0,1.0,1.5


In [41]:
results_model_variance.minimum_value*100

7.975084243847645

### Results on the experimentally designed emulator with integrated variance reduction

In [42]:
results_integrated_variance = bo_integrated_variance.get_results()

In [43]:
results_random_init_df = pd.DataFrame(results_integrated_variance.minimum_location.reshape(1,-1))
results_random_init_df.columns = parameters_name
results_random_init_df

Unnamed: 0,Size of the grid,Speed limit in the net (m/s),Max vehicles speed (m/s),Length of the roads (m),Number of lanes,Accelleration (m/s^2)
0,20.0,25.0,5.0,70.0,1.0,1.5


In [44]:
results_integrated_variance.minimum_value*100

8.285966906656563

## Save stuff

In [28]:
with open(f'bayesian_optimization/bayesian_opt_results/results_random_init_timeLoss.pkl', "wb") as f:
     pickle.dump(results_random_init, f)

with open(f'bayesian_optimization/bayesian_opt_results/results_model_variance_timeLoss.pkl', "wb") as f:
     pickle.dump(results_model_variance, f)
        
with open(f'bayesian_optimization/bayesian_opt_results/results_integrated_variance_timeLoss.pkl', "wb") as f:
     pickle.dump(results_integrated_variance, f)