In [1]:
%cd ../

/Users/harrysonghurst/Documents/Coursework/L48 Machine Learning and the Physical World/TrafficEmu


In [2]:
# If you get a SciPy error when installing Emukit, build it from source:

# git clone https://github.com/amzn/Emukit.git
# cd Emukit
# pip install -r requirements/requirements.txt
# python setup.py develop

In [3]:
import numpy as np
import pickle
import emukit as ek
import GPy

from emukit.model_wrappers import GPyModelWrapper
from emukit.experimental_design.experimental_design_loop import ExperimentalDesignLoop
from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter
from emukit.core.initial_designs import RandomDesign, latin_design
from emukit.experimental_design.acquisitions import ModelVariance, IntegratedVarianceReduction
from emukit.core.loop import UserFunctionWrapper
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement
from emukit.bayesian_optimization.loops import BayesianOptimizationLoop
from emukit.core.optimization import GradientAcquisitionOptimizer

from sumo_grid_simulation.grid_simulation import Simulator
import experimental_design.config as config

### Pseudocode

Fitting a gaussian process to a simulator using Emukit takes the following form (taken from L48 lectures):

```
initialize GP with some randomly chosen points
while stopping condition is not met:
    compute candidate point(s) using GP and acquisition funciton (model_variance) -> new point
    evaluate this new point with our simulator/user function -> observation
    update model with new observation -> new GP
```

### Simulator

In [4]:
simulator = Simulator(end_time=300)

### User Function
 
This is the function we want to understand. Namedly, how are CO2 emissions and timeLoss (the time lost due to driving below the ideal speed) affected by the following parameters: 

```
gridSize: the size of the grid network where the simulation is carried out
edgeMaxSpeed: legal speed limit in m/s - this can be exceeded (11.11 == 40km/h)
maxSpeed: the absolute maximum velocity of any vehicle in m/s (55.55 == 200 km/h)
edgeLength: length of the roads between intersections in meters
numLanes: number of lanes per road
accel: The acceleration ability of vehicles in m/s^2.
```

*A complete list of parmeters analysed is discussed in our report.*

In [11]:
def user_function_time_loss(X):
    """  X = inputs - emukit doesnt pass named args, just an NxM ndarray, N is the number of points to evaluate, M is the number of parameters per each point """
    result = []
    i = 0
    
    print(X)
    print(f'\nUser function time loss called with {X.shape[0]} inputs to simulate')

    
    for gridSize, edgeMaxSpeed, numLanes, accel in X:
        print(f'\nEvaluating input: {i+1} of {X.shape[0]}\n')
        
        alpha = 0.005
        max_number_of_vehicles = ((gridSize - 1) * gridSize * 2 + 4 * gridSize) * 10
        period = 300/(max_number_of_vehicles * alpha)
        
        s = simulator.simulate(
            gridSize      = int(gridSize),
            edgeMaxSpeed  = edgeMaxSpeed,
#             maxSpeed      = maxSpeed,
#             edgeLength    = edgeLength,
            numberOfLanes = int(numLanes),
            accel         = accel,
            trips_generator_period = period
        )
        # average time loss / average route duration
        result.append(s['timeLoss']/s['duration'])
        i += 1
        
    # expand dims is essential or the acquition function breaks
    return np.expand_dims(np.array(result), 1)  

### Model Inputs / parameter space

In [56]:
parameter_space = config.get_parameter_space()

### Get init, test, and train points

Train points are used to optimize a GP without experimental design - to verify experimental design works.

In [57]:
design = RandomDesign(parameter_space)
init_X = design.get_samples(20)
init_Y = user_function_time_loss(init_X)

design = RandomDesign(parameter_space)
test_X = design.get_samples(50)
test_Y = user_function_time_loss(test_X)

design = RandomDesign(parameter_space)
train_X = design.get_samples(250)
train_Y = user_function_time_loss(train_X)

[[ 7.          9.59503611  2.          1.37621928]
 [ 6.         10.51706091  1.          1.49605149]
 [ 3.         13.91220573  1.          2.13969597]
 [ 3.         17.93894878  1.          2.81357603]
 [ 8.         12.73839795  3.          1.65390904]
 [ 6.          9.67471123  2.          1.79264984]
 [10.         11.46956136  1.          2.67417509]
 [ 3.         13.82467557  1.          1.75409524]
 [ 7.         16.41307344  2.          1.91362615]
 [ 5.         15.85933282  2.          1.67924305]
 [ 8.         15.29692411  2.          2.13448762]
 [ 4.         13.17954287  1.          1.33729367]
 [ 5.         10.89015214  2.          1.62849411]
 [ 6.         14.3214885   2.          2.95040701]
 [ 4.         19.78047493  2.          2.77721368]
 [10.         13.19454801  3.          2.41093324]
 [ 9.          8.40390059  2.          2.3356186 ]
 [ 3.         11.2281715   2.          1.34100593]
 [ 5.          9.44599072  2.          1.94223962]
 [ 3.          8.71662158  3.  

 Retrying in 1 seconds

Evaluating input: 2 of 200

 Retrying in 1 seconds

Evaluating input: 3 of 200

 Retrying in 1 seconds

Evaluating input: 4 of 200

 Retrying in 1 seconds

Evaluating input: 5 of 200

 Retrying in 1 seconds

Evaluating input: 6 of 200

 Retrying in 1 seconds

Evaluating input: 7 of 200

 Retrying in 1 seconds

Evaluating input: 8 of 200

 Retrying in 1 seconds

Evaluating input: 9 of 200

 Retrying in 1 seconds

Evaluating input: 10 of 200

 Retrying in 1 seconds

Evaluating input: 11 of 200

 Retrying in 1 seconds

Evaluating input: 12 of 200

 Retrying in 1 seconds

Evaluating input: 13 of 200

 Retrying in 1 seconds

Evaluating input: 14 of 200

 Retrying in 1 seconds

Evaluating input: 15 of 200

 Retrying in 1 seconds

Evaluating input: 16 of 200

 Retrying in 1 seconds

Evaluating input: 17 of 200

 Retrying in 1 seconds

Evaluating input: 18 of 200

 Retrying in 1 seconds

Evaluating input: 19 of 200

 Retrying in 1 seconds

Evaluating input: 20 of 200

 

 Retrying in 1 seconds

Evaluating input: 156 of 200

 Retrying in 1 seconds

Evaluating input: 157 of 200

 Retrying in 1 seconds

Evaluating input: 158 of 200

 Retrying in 1 seconds

Evaluating input: 159 of 200

 Retrying in 1 seconds

Evaluating input: 160 of 200

 Retrying in 1 seconds

Evaluating input: 161 of 200

 Retrying in 1 seconds

Evaluating input: 162 of 200

 Retrying in 1 seconds

Evaluating input: 163 of 200

 Retrying in 1 seconds

Evaluating input: 164 of 200

 Retrying in 1 seconds

Evaluating input: 165 of 200

 Retrying in 1 seconds

Evaluating input: 166 of 200

 Retrying in 1 seconds

Evaluating input: 167 of 200

 Retrying in 1 seconds

Evaluating input: 168 of 200

 Retrying in 1 seconds

Evaluating input: 169 of 200

 Retrying in 1 seconds

Evaluating input: 170 of 200

 Retrying in 1 seconds

Evaluating input: 171 of 200

 Retrying in 1 seconds

Evaluating input: 172 of 200

 Retrying in 1 seconds

Evaluating input: 173 of 200

 Retrying in 1 seconds

Eval

### Experimental Design - Model Variance

#### Model / Emulator (GP)

Our surrogate model is our emulator. In this case, a gaussian process.

In [58]:
emulator_ed = GPy.models.GPRegression(init_X, init_Y)
emukit_model_ed = GPyModelWrapper(emulator_ed, n_restarts=5)
emukit_model_ed.optimize()
emulator_ed

Optimization restart 1/5, f = -30.123663898287347
Optimization restart 2/5, f = -30.123663898309413
Optimization restart 3/5, f = -30.12366388272921
Optimization restart 4/5, f = -30.12366389830084
Optimization restart 5/5, f = -30.1236638983859


GP_regression.,value,constraints,priors
rbf.variance,0.5421099987279349,+ve,
rbf.lengthscale,29.59049031699629,+ve,
Gaussian_noise.variance,0.0009258824139696,+ve,


#### Acquisition Function

In [59]:
model_variance = ModelVariance(model=emukit_model_ed)

#### Optimizer

In [60]:
optimizer = GradientAcquisitionOptimizer(parameter_space)

#### Experimental Design Loop

In [61]:
ed_loop = ExperimentalDesignLoop(
    model = emukit_model_ed,
    space = parameter_space,
    acquisition = model_variance,
    acquisition_optimizer = optimizer,
    update_interval = 1,
    batch_size = 5
)

In [62]:
ed_loop.run_loop(user_function_time_loss, 50)

Optimization restart 1/5, f = -30.1236638983859
Optimization restart 2/5, f = -30.123663898138833
Optimization restart 3/5, f = -30.12366389817443
Optimization restart 4/5, f = -30.123663898307967
Optimization restart 5/5, f = -30.123663898206907
[[10. 20.  1.  1.]
 [ 3. 20.  3.  1.]
 [10. 20.  3.  3.]
 [10.  8.  3.  1.]
 [10.  8.  1.  3.]]

User function time loss called with 5 inputs to simulate

Evaluating input: 1 of 5





 Retrying in 1 seconds

Evaluating input: 2 of 5

 Retrying in 1 seconds

Evaluating input: 3 of 5

 Retrying in 1 seconds

Evaluating input: 4 of 5

 Retrying in 1 seconds

Evaluating input: 5 of 5

 Retrying in 1 seconds
Optimization restart 1/5, f = -39.50544502296353
Optimization restart 2/5, f = -39.50544504125409
Optimization restart 3/5, f = -39.50544504068958
Optimization restart 4/5, f = -39.50544504114181
Optimization restart 5/5, f = -39.50544504112854
[[ 3.  8.  3.  3.]
 [10. 20.  1.  3.]
 [ 3. 20.  3.  3.]
 [ 3.  8.  1.  3.]
 [ 3. 20.  1.  1.]]

User function time loss called with 5 inputs to simulate

Evaluating input: 1 of 5

 Retrying in 1 seconds

Evaluating input: 2 of 5

 Retrying in 1 seconds

Evaluating input: 3 of 5

 Retrying in 1 seconds

Evaluating input: 4 of 5

 Retrying in 1 seconds

Evaluating input: 5 of 5

 Retrying in 1 seconds
Optimization restart 1/5, f = -50.26256778710493
Optimization restart 2/5, f = -50.26256778708287
Optimization restart 3/5, f = 

 Retrying in 1 seconds

Evaluating input: 2 of 5

 Retrying in 1 seconds

Evaluating input: 3 of 5

 Retrying in 1 seconds

Evaluating input: 4 of 5

 Retrying in 1 seconds

Evaluating input: 5 of 5

 Retrying in 1 seconds
Optimization restart 1/5, f = -172.31095500632668
Optimization restart 2/5, f = -172.310953679405
Optimization restart 3/5, f = -172.31095403347152
Optimization restart 4/5, f = -172.31095154879267
Optimization restart 5/5, f = -174.8658444122023
[[10.        20.         1.         1.       ]
 [ 3.         8.         3.         1.       ]
 [ 3.        20.         1.         3.       ]
 [10.        13.8464026  3.         3.       ]
 [ 3.         8.         3.         3.       ]]

User function time loss called with 5 inputs to simulate

Evaluating input: 1 of 5

 Retrying in 1 seconds

Evaluating input: 2 of 5

 Retrying in 1 seconds

Evaluating input: 3 of 5

 Retrying in 1 seconds

Evaluating input: 4 of 5

 Retrying in 1 seconds

Evaluating input: 5 of 5

 Retrying

 Retrying in 1 seconds

Evaluating input: 4 of 5

 Retrying in 1 seconds

Evaluating input: 5 of 5

 Retrying in 1 seconds
Optimization restart 1/5, f = -303.7259338057023
Optimization restart 2/5, f = -303.7259138612302
Optimization restart 3/5, f = -296.0874392366146
Optimization restart 4/5, f = -303.7259099570591
Optimization restart 5/5, f = -296.087439197756
[[ 8.         15.03576802  1.          3.        ]
 [ 9.         16.68922673  1.          2.1591588 ]
 [ 7.         20.          2.          2.12474831]
 [ 3.         10.73276903  3.          1.95706552]
 [ 3.         12.00243101  1.          1.        ]]

User function time loss called with 5 inputs to simulate

Evaluating input: 1 of 5

 Retrying in 1 seconds

Evaluating input: 2 of 5

 Retrying in 1 seconds

Evaluating input: 3 of 5

 Retrying in 1 seconds

Evaluating input: 4 of 5

 Retrying in 1 seconds

Evaluating input: 5 of 5

 Retrying in 1 seconds
Optimization restart 1/5, f = -316.434289864104
Optimization restart 

 Retrying in 1 seconds
Optimization restart 1/5, f = -426.805548066304
Optimization restart 2/5, f = -426.8054822236734
Optimization restart 3/5, f = -404.3092480028472
Optimization restart 4/5, f = -404.30924806898315
Optimization restart 5/5, f = -426.80554775169395
[[ 3.          9.53498456  2.          3.        ]
 [10.         20.          2.          2.09882463]
 [ 7.         12.08520935  2.          2.16549814]
 [ 9.         20.          2.          3.        ]
 [ 3.         18.80915399  3.          1.        ]]

User function time loss called with 5 inputs to simulate

Evaluating input: 1 of 5

 Retrying in 1 seconds

Evaluating input: 2 of 5

 Retrying in 1 seconds

Evaluating input: 3 of 5

 Retrying in 1 seconds

Evaluating input: 4 of 5

 Retrying in 1 seconds

Evaluating input: 5 of 5

 Retrying in 1 seconds
Optimization restart 1/5, f = -441.86059626766746
Optimization restart 2/5, f = -418.30286768683254
Optimization restart 3/5, f = -441.860595648622
Optimization restar

Optimization restart 3/5, f = -508.7463718754773
Optimization restart 4/5, f = -508.7463718222681
Optimization restart 5/5, f = -508.7463718598524
[[ 7.         11.13582323  1.          3.        ]
 [10.         11.32885684  3.          2.10090197]
 [ 3.         16.30021209  1.          2.05000694]
 [ 3.          8.97147202  2.          1.        ]
 [ 9.         19.22862961  3.          3.        ]]

User function time loss called with 5 inputs to simulate

Evaluating input: 1 of 5

 Retrying in 1 seconds

Evaluating input: 2 of 5

 Retrying in 1 seconds

Evaluating input: 3 of 5

 Retrying in 1 seconds

Evaluating input: 4 of 5

 Retrying in 1 seconds

Evaluating input: 5 of 5

 Retrying in 1 seconds
Optimization restart 1/5, f = -535.556297984368
Optimization restart 2/5, f = -521.163832996526
Optimization restart 3/5, f = -521.1638329962389
Optimization restart 4/5, f = -521.163832976404
Optimization restart 5/5, f = -521.163832996574
[[ 8.         16.07180569  1.          1.       

### Experimental Design - Integrated Variance Reduction

#### Model / Emulator (GP)

In [68]:
emulator_ird = GPy.models.GPRegression(init_X, init_Y, noise_var=1e-10)
emukit_model_ird = GPyModelWrapper(emulator_ird, n_restarts=5)
emukit_model_ird.optimize()

Optimization restart 1/5, f = -19.61491373133359
Optimization restart 2/5, f = -30.123663898283255
Optimization restart 3/5, f = -30.123663898380876
Optimization restart 4/5, f = -30.123663898385853
Optimization restart 5/5, f = -30.123663898386102


#### Acquisition Function

In [69]:
integrated_variance_reduction = IntegratedVarianceReduction(model=emukit_model_ird, space=parameter_space)

#### Optimizer

In [70]:
optimizer = GradientAcquisitionOptimizer(parameter_space)

#### Experimental Design Loop

In [71]:
ed_loop = ExperimentalDesignLoop(
    model = emukit_model_ird,
    space = parameter_space,
    acquisition = integrated_variance_reduction,
    acquisition_optimizer = optimizer,
    update_interval = 1,
    batch_size = 10
)

In [72]:
ed_loop.run_loop(user_function_time_loss, 10)

Optimization restart 1/5, f = -30.123663898386102
Optimization restart 2/5, f = -30.12366389815559
Optimization restart 3/5, f = -30.123663898370488
Optimization restart 4/5, f = -29.915752486615947
Optimization restart 5/5, f = -30.12366389838657




[[10.         19.48493387  1.          1.48992291]
 [ 3.         18.56383444  3.          1.07153559]
 [10.         19.98036324  3.          2.53740027]
 [10.         10.31642425  3.          1.05533876]
 [ 9.         19.76758188  1.          1.13296382]
 [ 9.          8.06588965  1.          2.66951995]
 [ 4.         19.05832594  3.          1.11976761]
 [ 9.         16.11582068  3.          2.95037847]
 [ 3.          8.12812851  3.          2.64249375]
 [10.          8.96308323  3.          1.0660271 ]]

User function time loss called with 10 inputs to simulate

Evaluating input: 1 of 10

 Retrying in 1 seconds

Evaluating input: 2 of 10

 Retrying in 1 seconds

Evaluating input: 3 of 10

 Retrying in 1 seconds

Evaluating input: 4 of 10

 Retrying in 1 seconds

Evaluating input: 5 of 10

 Retrying in 1 seconds

Evaluating input: 6 of 10

 Retrying in 1 seconds

Evaluating input: 7 of 10

 Retrying in 1 seconds

Evaluating input: 8 of 10

 Retrying in 1 seconds

Evaluating input: 9 o

 Retrying in 1 seconds

Evaluating input: 2 of 10

 Retrying in 1 seconds

Evaluating input: 3 of 10

 Retrying in 1 seconds

Evaluating input: 4 of 10

 Retrying in 1 seconds

Evaluating input: 5 of 10

 Retrying in 1 seconds

Evaluating input: 6 of 10

 Retrying in 1 seconds

Evaluating input: 7 of 10

 Retrying in 1 seconds

Evaluating input: 8 of 10

 Retrying in 1 seconds

Evaluating input: 9 of 10

 Retrying in 1 seconds

Evaluating input: 10 of 10

 Retrying in 1 seconds
Optimization restart 1/5, f = -188.40113826278773
Optimization restart 2/5, f = -188.40113828092356
Optimization restart 3/5, f = -188.40113828110822
Optimization restart 4/5, f = -188.40113828092524
Optimization restart 5/5, f = -188.40113827035654
[[ 6.         19.72677365  1.          1.22757468]
 [10.          8.02027891  3.          2.81419752]
 [10.         12.53418302  1.          1.06271827]
 [10.         19.95910581  1.          1.26367345]
 [ 3.         12.49958454  3.          1.0944904 ]
 [ 3.       

### Random GP

Supervised learning

In [64]:
emulator_rnd = GPy.models.GPRegression(train_X, train_Y)
emukit_model_rnd = GPyModelWrapper(emulator_rnd, n_restarts=5)
emukit_model_rnd.optimize()
emulator_rnd

Optimization restart 1/5, f = -425.091967728542
Optimization restart 2/5, f = -404.1223847688247
Optimization restart 3/5, f = -425.0919679879982
Optimization restart 4/5, f = -425.0919679820563
Optimization restart 5/5, f = -425.0919679879704


GP_regression.,value,constraints,priors
rbf.variance,0.2599709163088397,+ve,
rbf.lengthscale,14.710576636825746,+ve,
Gaussian_noise.variance,0.0006171249210919,+ve,


### Evaluate

Evaluate RMSE of each model on our test set of 50 randomly selected points.

In [65]:
pred_Y, pred_Y_variance = emukit_model_ed.predict(test_X)
mse_ed = np.sqrt(np.mean((pred_Y-test_Y)**2))
print('Experimental Design w/ Model Variance RMSE: ', mse_ed)

Experimental Design w/ Model Variance RMSE:  0.016557482174249984


In [73]:
pred_Y, pred_Y_variance = emukit_model_ird.predict(test_X)
mse_ird = np.sqrt(np.mean((pred_Y-test_Y)**2))
print('Experimental Design w/ Integrated Variance Reduction RMSE: ', mse_ird)

Experimental Design w/ Integrated Variance Reduction RMSE:  0.03357609604099156


In [66]:
pred_Y, pred_Y_variance = emukit_model_rnd.predict(test_X)
mse_rnd = np.sqrt(np.mean((pred_Y-test_Y)**2))
print('Random GP RMSE: ', mse_rnd)

Random GP RMSE:  0.031080123842760262


In [67]:
p = 1 - (mse_ed/mse_rnd)
f'Experimental Design w/ Model Variance is {p*100:.3f}% better than training on random points.'

'Experimental Design w/ Model Variance is 46.726% better than training on random points.'

In [74]:
p = 1 - (mse_ird/mse_rnd)
f'Experimental Design w/ Integrated Variance Reduction is {p*100:.3f}% better than training on random points.'

'Experimental Design w/ Integrated Variance Reduction is -8.031% better than training on random points.'

### Save Models

In [76]:
with open(f'experimental_design/models/model_variance_20_init_points_250_loops_timeloss.pkl', "wb") as f:
     pickle.dump(emukit_model_ed, f)

with open(f'experimental_design/models/integrated_variance_reduction_20_init_points_10_loops_timeloss.pkl', "wb") as f:
     pickle.dump(emukit_model_ird, f)
        
with open(f'experimental_design/models/250_random_points_supervised_GP_timeloss.pkl', "wb") as f:
     pickle.dump(emukit_model_rnd, f)