In [1]:
%cd ../

/Users/harrysonghurst/Documents/Coursework/L48 Machine Learning and the Physical World/TrafficEmu


In [2]:
# If you get a SciPy error when installing Emukit, build it from source:

# git clone https://github.com/amzn/Emukit.git
# cd Emukit
# pip install -r requirements/requirements.txt
# python setup.py develop

In [3]:
import numpy as np
import pickle
import emukit as ek
import GPy

from emukit.model_wrappers import GPyModelWrapper
from emukit.experimental_design.experimental_design_loop import ExperimentalDesignLoop
from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter
from emukit.core.initial_designs import RandomDesign, latin_design
from emukit.experimental_design.acquisitions import ModelVariance, IntegratedVarianceReduction
from emukit.core.loop import UserFunctionWrapper
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement
from emukit.bayesian_optimization.loops import BayesianOptimizationLoop

from sumo_grid_simulation.grid_simulation import Simulator

### Pseudocode

Fitting a gaussian process to a simulator using Emukit takes the following form (taken from L48 lectures):

```
initialize GP with some randomly chosen points
while stopping condition is not met:
    compute candidate point(s) using GP and acquisition funciton (model_variance) -> new point
    evaluate this new point with our simulator/user function -> observation
    update model with new observation -> new GP
```

### Simulator

In [4]:
# trips_generator_period = 10 else simulation is v. slow
simulator = Simulator(trips_generator_period=10)

### User Function
 
This is the function we want to understand. Namedly, how are CO2 emissions and timeLoss (the time lost due to driving below the ideal speed) affected by the following parameters: 

```
edgeMaxSpeed: legal speed limit in m/s - this can be exceeded (11.11 == 40km/h)
maxSpeed: the absolute maximum velocity of any vehicle in m/s (55.55 == 200 km/h)
edgeLength: length of the roads between intersections in meters
numLanes: number of lanes per road
accel: The acceleration ability of vehicles in m/s^2.
```

*A complete list of parmeters analysed is discussed in our report.*

In [11]:
def user_function_time_loss(X):
    """  X = inputs - emukit doesnt pass named args, just an NxM ndarray """
    result = []
    print(X)
    
    for gridSize, edgeMaxSpeed, maxSpeed, edgeLength, numLanes, accel in X:
        s = simulator.simulate(
            gridSize      = int(gridSize),
            edgeMaxSpeed  = edgeMaxSpeed,
            maxSpeed      = maxSpeed,
            edgeLength    = int(edgeLength),
            numberOfLanes = int(numLanes),
            accel         = accel
        )
        # average time loss / average route length
        result.append(s['timeLoss']/s['routeLength'])
        
    # expand dims is essential or the acquition function breaks
    return np.expand_dims(np.array(result), 1)  

### Model (GP)

Our surrogate model is our emulator. In this case, a gaussian process. I think that emukit handles categorical/discrete inputs using one-hot encodings.

#### Model Inputs

In [12]:
gridSize = ContinuousParameter('gridSize', min_value=3, max_value=20)
edgeMaxSpeed = ContinuousParameter('edgeMaxSpeed', min_value=1, max_value=25)
maxSpeed = ContinuousParameter('maxSpeed', min_value=1, max_value=25)
edgeLength = ContinuousParameter('edgeLength', min_value=2, max_value=200)
numberOfLanes = DiscreteParameter('numberOfLanes', domain=[1,2,3])
accel = ContinuousParameter('accel', 1., 6.)

parameter_space = ParameterSpace([gridSize, edgeMaxSpeed, maxSpeed, edgeLength, numberOfLanes, accel])

#### Initialize Model / Emulator (GP)

In [26]:
design = RandomDesign(parameter_space)  # initialize with random points
num_data_points = 250
init_X = design.get_samples(num_data_points)
init_Y = user_function_time_loss(init_X)

[[ 10.33587602  11.01652992  13.58950958  19.27808634   1.
    1.44777348]
 [ 16.62791691  14.40574552   6.96809843  48.78288255   3.
    3.25599695]
 [  6.47560114   8.21401892  15.91831698  15.05012724   1.
    5.87045671]
 ...
 [  8.06979113  19.58746025  13.58715076  56.46764741   2.
    3.97766899]
 [  3.05518417  21.72042646   4.32578951  17.34817092   3.
    2.50860644]
 [ 18.71083031  11.78438375   4.16261765 134.54223062   1.
    5.14045339]]
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 

 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying i

In [27]:
with open('init_points_6_param.pkl', "wb") as f:
     pickle.dump((init_X, init_Y), f)

In [28]:
# emulator = pickle.load(open('experimental_design_timeLoss_50_iter_batch_size_3.pkl',"rb"))
emulator = GPy.models.GPRegression(init_X, init_Y)
emukit_model = GPyModelWrapper(emulator)
emulator

GP_regression.,value,constraints,priors
rbf.variance,1.0,+ve,
rbf.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


In [15]:
init_Y

array([[0.01992411],
       [0.26716679],
       [0.02106082],
       [0.02371084],
       [0.04873735],
       [0.12322937],
       [0.0312696 ],
       [0.13777407],
       [0.0149418 ],
       [0.02982264],
       [0.09335313],
       [0.0165484 ],
       [0.0402875 ],
       [0.04601946],
       [0.02772038],
       [0.12785355],
       [0.10427855],
       [0.11704698],
       [0.08456244],
       [0.02632688],
       [0.01334843],
       [0.06278568],
       [0.09267322],
       [0.01925865],
       [0.01294227]])

### Optimization

#### Acquisition Function

In [29]:
model_variance = ModelVariance(model=emukit_model)

#### Experimental Design

In [30]:
ed_loop = ExperimentalDesignLoop(
    model = emukit_model,
    space = parameter_space,
    acquisition = model_variance,
    batch_size = 1
)

In [31]:
ed_loop.run_loop(user_function_time_loss, 100)

Optimization restart 1/1, f = 235.8962930809298
[[20.         25.         13.98262421  2.          1.          6.        ]]

 Retrying in 1 seconds
Optimization restart 1/1, f = 236.5580789455865
[[20.         25.          1.         84.61935295  1.          1.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 237.29632097924917
[[  3.          25.          25.         133.92424182   1.
    1.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 237.94609374224393
[[ 20.           1.           1.         166.60062302   1.
    6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 238.68237384230633
[[ 3.          1.         25.         91.14256602  3.          6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 239.4488244800741
[[  3.          25.           1.         158.03418902   1.
    6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 240.19323651220444
[[20.         1.        25.        75.5945262  1.         1.     

 Retrying in 1 seconds
Optimization restart 1/1, f = 383.7163519059715
[[ 3.          1.          1.         30.27824654  1.          1.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 384.879428080372
[[20.          1.         15.38198074 49.33838942  3.          1.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 386.05182632083233
[[  3.          25.          25.         151.79291065   1.
    6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 387.2140970648919
[[20.          1.          1.         66.09634104  3.          6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 388.41756695826825
[[ 3.         25.          1.         54.39006596  3.          6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 389.6091421009205
[[ 3.          1.         25.         58.85989061  1.          1.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 390.8208004910279
[[20.  1.  1.  2.  1.  1.]]

 Retrying in 1 se

 Retrying in 1 seconds
Optimization restart 1/1, f = 431.74781503966926
[[  3.          13.54423044  25.         101.36053572   3.
    6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 432.721240138559
[[  3.          25.           1.         189.57551466   3.
    6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 433.71697816018775
[[20.         25.          1.         31.07656165  3.          1.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 434.77294137141655
[[20.          1.          1.         49.16814565  3.          6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 435.85578796234097
[[20.         14.52697321  1.         77.42874776  3.          1.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 436.8192365388153
[[  3.           1.           1.         173.26638153   3.
    6.        ]]
 Retrying in 1 seconds
Optimization restart 1/1, f = 437.81522258739494
[[20.         12.79015551  1.         62.50

### Evaluate

#### Test on 25 point test set

In [22]:
design = RandomDesign(parameter_space)  # initialize with random points
num_data_points = 25
test_X = design.get_samples(num_data_points)
test_Y = user_function_time_loss(test_X)

[[ 17.74164414   4.47819712  22.92517542 156.43432953   1.
    5.18996509]
 [  8.87903432  19.85880676  20.98060366 168.5313835    1.
    2.29685529]
 [  6.07937847  11.60478553  18.63913742 151.53272833   2.
    2.00224977]
 [ 14.7036176   24.29256155   5.67293463  93.11649403   2.
    4.07597088]
 [  3.06258204  20.00571735  14.09110268 143.32162282   2.
    3.4045103 ]
 [ 11.58053279  15.44211809   2.24862032 172.87603896   3.
    5.41479714]
 [ 17.27074678   1.15111185  19.16630237  90.85624074   3.
    4.83750778]
 [ 14.00199028  17.37110855  24.42397088 176.97111434   1.
    4.48150837]
 [  5.41545797  17.0223595    8.58741279  13.98218367   1.
    4.0218222 ]
 [  7.61614089   9.81779992   8.25310184  98.16649721   3.
    5.21924655]
 [  4.25353183  19.1639502    5.96550596 119.87989637   3.
    4.20040659]
 [  7.12923339  16.13449201  20.26763368  94.06885367   1.
    1.89357651]
 [ 19.9423617    8.39969859  17.11163329  12.06719419   3.
    4.99557225]
 [ 17.18360778  17.865493

In [32]:
with open('test_points_6_param.pkl', "wb") as f:
     pickle.dump((test_X, test_Y), f)

In [33]:
pred_Y, pred_Y_variance = emukit_model.predict(test_X)
mse = np.sqrt(np.mean((pred_Y-test_Y)**2))
print('Root mean squared error: ', mse)

Root mean squared error:  1.4445145369062213


In [34]:
pred_Y, test_Y

(array([[2.45691158e-01],
        [2.38863539e-02],
        [4.69330831e-03],
        [1.00988806e-01],
        [1.35885465e-02],
        [1.05232247e-01],
        [2.28397253e-01],
        [9.31499341e-03],
        [7.26287369e+00],
        [3.83722195e-02],
        [1.34149205e-01],
        [2.27769023e-02],
        [3.36298296e-01],
        [1.33301769e-02],
        [1.01401805e-01],
        [4.72236366e-02],
        [7.42585919e-02],
        [1.17316377e-01],
        [1.51749925e-01],
        [6.84317207e-02],
        [1.48797727e-01],
        [2.29759632e-03],
        [2.24372114e-01],
        [1.03947071e-02],
        [1.12581139e-01]]),
 array([[0.07507789],
        [0.01842446],
        [0.01340747],
        [0.04006658],
        [0.01636273],
        [0.1492    ],
        [0.29265569],
        [0.01211676],
        [0.05204098],
        [0.02777613],
        [0.03821875],
        [0.02842034],
        [0.11653083],
        [0.01095996],
        [0.01356708],
        [0.0250339

In [19]:
# test_X, test_Y = pickle.load(open('init_and_test_points/test_points_6_param.pkl', 'rb'))
# test_Y = test_Y[:,:,0]
pred_Y, pred_Y_variance = emukit_model.predict(init_X)

In [20]:
mse = np.sqrt(np.mean((pred_Y-init_Y)**2))
print('Root mean squared error: ', mse)

Root mean squared error:  0.043233969041942356


In [21]:
pred_Y, init_Y

(array([[0.0099622 ],
        [0.13358535],
        [0.01053057],
        [0.01185559],
        [0.02436903],
        [0.06161562],
        [0.01563503],
        [0.06888804],
        [0.00747101],
        [0.01491154],
        [0.04667725],
        [0.00827432],
        [0.02014405],
        [0.02301007],
        [0.01386039],
        [0.06392771],
        [0.05214004],
        [0.05852435],
        [0.04228184],
        [0.01316363],
        [0.00667431],
        [0.0313933 ],
        [0.04633733],
        [0.00962946],
        [0.00647123]]),
 array([[0.01992411],
        [0.26716679],
        [0.02106082],
        [0.02371084],
        [0.04873735],
        [0.12322937],
        [0.0312696 ],
        [0.13777407],
        [0.0149418 ],
        [0.02982264],
        [0.09335313],
        [0.0165484 ],
        [0.0402875 ],
        [0.04601946],
        [0.02772038],
        [0.12785355],
        [0.10427855],
        [0.11704698],
        [0.08456244],
        [0.02632688],
        

### Save Model

In [36]:
with open('timeLoss_model_variance_100_iter_batch_size_1.pkl', "wb") as f:
     pickle.dump(emulator, f)

### Analysis

As we can see, optimizing for model_variance causes emukit to alternate between extreme choices of \[gridSize, edgeMaxSpeed, maxSpeed, numberOfLanes, accel\] whilst only varying edgeLength sensibly. This means that we don't explore the parameter space comprehensively - which is undesirable.