In [1]:
%cd ../

/Users/Matteo/PycharmProjects/TrafficEmu


In [2]:
# If you get a SciPy error when installing Emukit, build it from source:

# git clone https://github.com/amzn/Emukit.git
# cd Emukit
# pip install -r requirements/requirements.txt
# python setup.py develop

In [3]:
import numpy as np
import pickle
import emukit as ek
import GPy

from emukit.model_wrappers import GPyModelWrapper
from emukit.experimental_design.experimental_design_loop import ExperimentalDesignLoop
from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter
from emukit.core.initial_designs import RandomDesign, latin_design
from emukit.experimental_design.acquisitions import ModelVariance, IntegratedVarianceReduction
from emukit.core.loop import UserFunctionWrapper
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement
from emukit.bayesian_optimization.loops import BayesianOptimizationLoop
from emukit.core.optimization import GradientAcquisitionOptimizer

from sumo_grid_simulation.grid_simulation import Simulator

### Pseudocode

Fitting a gaussian process to a simulator using Emukit takes the following form (taken from L48 lectures):

```
initialize GP with some randomly chosen points
while stopping condition is not met:
    compute candidate point(s) using GP and acquisition funciton (model_variance) -> new point
    evaluate this new point with our simulator/user function -> observation
    update model with new observation -> new GP
```

### Simulator

In [4]:
# trips_generator_period = 5 else simulation is v. slow
simulator = Simulator(trips_generator_period=5, end_time=300)

### User Function
 
This is the function we want to understand. Namedly, how are CO2 emissions and timeLoss (the time lost due to driving below the ideal speed) affected by the following parameters: 

```
gridSize: the size of the grid network where the simulation is carried out
edgeMaxSpeed: legal speed limit in m/s - this can be exceeded (11.11 == 40km/h)
maxSpeed: the absolute maximum velocity of any vehicle in m/s (55.55 == 200 km/h)
edgeLength: length of the roads between intersections in meters
numLanes: number of lanes per road
accel: The acceleration ability of vehicles in m/s^2.
```

*A complete list of parmeters analysed is discussed in our report.*

In [5]:
def user_function_time_loss(X):
    """  X = inputs - emukit doesnt pass named args, just an NxM ndarray, N is the number of points to evaluate, M is the number of parameters per each point """
    result = []
    i = 0
    
    print(X)
    print(f'\nUser function time loss called with {X.shape[0]} inputs to simulate')

    
    for gridSize, edgeMaxSpeed, maxSpeed, edgeLength, numLanes, accel in X:
        print(f'\nEvaluating input: {i+1} of {X.shape[0]}\n')
        s = simulator.simulate(
            gridSize      = int(gridSize),
            edgeMaxSpeed  = edgeMaxSpeed,
            maxSpeed      = maxSpeed,
            edgeLength    = int(edgeLength),
            numberOfLanes = int(numLanes),
            accel         = accel
        )
        # average time loss / average route length
        result.append(s['timeLoss']/s['routeLength'])
        i += 1
        
    # expand dims is essential or the acquition function breaks
    return np.expand_dims(np.array(result), 1)  

### Model (GP)

Our surrogate model is our emulator. In this case, a gaussian process. I think that emukit handles categorical/discrete inputs using one-hot encodings.

#### Model Inputs

In [6]:
# gridSize = ContinuousParameter('gridSize', min_value=3, max_value=20)
# edgeMaxSpeed = ContinuousParameter('edgeMaxSpeed', min_value=1, max_value=25)
# maxSpeed = ContinuousParameter('maxSpeed', min_value=1, max_value=25)
# edgeLength = ContinuousParameter('edgeLength', min_value=30, max_value=200)
# numberOfLanes = DiscreteParameter('numberOfLanes', domain=[1,2,3])
# accel = ContinuousParameter('accel', 1., 6.)

# parameter_space = ParameterSpace([gridSize, edgeMaxSpeed, maxSpeed, edgeLength, numberOfLanes, accel])
import experimental_design.config as config
parameter_space = config.get_parameter_space()

#### Initialize Model / Emulator (GP)

In [9]:
init_X, init_Y = pickle.load(open('experimental_design/init_points/25_init_points.pkl', 'rb'))
'''
design = RandomDesign(parameter_space)  # initialize with random points
num_init_points = 5
init_X = design.get_samples(num_init_points)
init_Y = user_function_time_loss(init_X)
with open(f'experimental_design/init_points/{num_init_points}_init_points.pkl', "wb") as f:
      pickle.dump((init_X, init_Y), f)

'''

(25, 6) (25, 1)


In [10]:
emulator = GPy.models.GPRegression(init_X, init_Y, noise_var=1e-10)
emukit_model = GPyModelWrapper(emulator, n_restarts=5)
emukit_model.optimize()
emulator

Optimization restart 1/5, f = -67.56624250620654
Optimization restart 2/5, f = -58.26021121824461
Optimization restart 3/5, f = -88.03728038059276
Optimization restart 4/5, f = -58.2602110850459
Optimization restart 5/5, f = -88.03728034348259


GP_regression.,value,constraints,priors
rbf.variance,0.0009275425340531,+ve,
rbf.lengthscale,143.76990248777886,+ve,
Gaussian_noise.variance,2.8887630195702812e-05,+ve,


### Optimization

#### Acquisition Function

In [11]:
model_variance = ModelVariance(model=emukit_model)

#### Optimizer

In [12]:
optimizer = GradientAcquisitionOptimizer(parameter_space)

#### Experimental Design

In [13]:
ed_loop = ExperimentalDesignLoop(
    model = emukit_model,
    space = parameter_space,
    acquisition = model_variance,
    acquisition_optimizer = optimizer,
)

In [14]:
ed_loop.run_loop(user_function_time_loss, 50)

Optimization restart 1/5, f = -88.03728038061342
Optimization restart 2/5, f = -58.2602110852974
Optimization restart 3/5, f = -58.26021108504591
Optimization restart 4/5, f = -88.03728038064665
Optimization restart 5/5, f = -58.26021108505367
[[ 3.25010935 11.60702147  9.80451195 30.6167518   1.          3.65652771]]

User function called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Optimization restart 1/5, f = -89.28516385845552
Optimization restart 2/5, f = -59.22505575266523
Optimization restart 3/5, f = -59.22505531316208
Optimization restart 4/5, f = -69.41132555025314
Optimization restart 5/5, f = -59.22521104868012
[[15.65725923 23.28973996 46.25199881 30.44766253  2.          2.4496226 ]]

User function called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Optimization restart 1/5, f = -87.05141270365638
Optimization restart 2/5, f = -58.91609009001341
Optimization restart 3/5, f = -58.91609007589271
Optimization res

 Retrying in 1 seconds
Optimization restart 1/5, f = -137.79559500861265
Optimization restart 2/5, f = -137.86455093794066
Optimization restart 3/5, f = -137.86455093610115
Optimization restart 4/5, f = -91.05781358836978
Optimization restart 5/5, f = -91.05781313936552
[[ 19.05777501   8.97152213  48.30509452 120.80425343   2.
    4.53070036]]

User function called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Optimization restart 1/5, f = -141.56492326068854
Optimization restart 2/5, f = -141.5649232606904
Optimization restart 3/5, f = -93.39065897433434
Optimization restart 4/5, f = -93.3906616876145
Optimization restart 5/5, f = -93.39068625070362
[[  3.0842864    9.21062233  43.86566103 149.9543689    2.
    4.46163807]]

User function called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Optimization restart 1/5, f = -145.24135765623004
Optimization restart 2/5, f = -145.2413576841675
Optimization restart 3/5, f = -95.712

Optimization restart 4/5, f = -188.34860311368976
Optimization restart 5/5, f = -123.06613283322338
[[ 4.75808338  8.49925356  5.83769709 32.93019116  3.          4.75256513]]

User function called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Optimization restart 1/5, f = -186.98098800878068
Optimization restart 2/5, f = -157.5129134541013
Optimization restart 3/5, f = -186.98098801266133
Optimization restart 4/5, f = -186.98098801262094
Optimization restart 5/5, f = -123.53368708833726
[[ 17.57459252  23.91457188  49.79448099 115.87033784   1.
    4.97028964]]

User function called with 1 inputs to simulate

Evaluating input: 1 of 1

 Retrying in 1 seconds
Optimization restart 1/5, f = -190.3952215182524
Optimization restart 2/5, f = -190.39522151779266
Optimization restart 3/5, f = -190.39522151813867
Optimization restart 4/5, f = -190.39522151786883
Optimization restart 5/5, f = -190.39522150025152
[[  3.98850013  24.07808639  47.2641434  147.87380008 

In [49]:
#emukit_model = pickle.load(open('experimental_design/models/25_init_50_loop_model_timeLoss.pkl', 'rb'))

### Evaluate

#### Test on 25 point test set

In [47]:
test_X, test_Y = pickle.load(open('experimental_design/test_points/25_test_points.pkl', 'rb'))

'''
design = RandomDesign(parameter_space)  # initialize with random points
num_test_points = 25
test_X = design.get_samples(num_test_points)
test_Y = user_function_time_loss(test_X)
with open(f'experimental_design/test_points/{num_test_points}_test_points.pkl', "wb") as f:
      pickle.dump((test_X, test_Y), f)
'''

'\ndesign = RandomDesign(parameter_space)  # initialize with random points\nnum_test_points = 25\ntest_X = design.get_samples(num_test_points)\ntest_Y = user_function_time_loss(test_X)\nwith open(f\'experimental_design/test_points/{num_test_points}_test_points.pkl\', "wb") as f:\n      pickle.dump((test_X, test_Y), f)\n'

In [48]:
pred_Y, pred_Y_variance = emukit_model.predict(test_X)
mse = np.sqrt(np.mean((pred_Y-test_Y)**2))
print('Root mean squared error: ', mse)

Root mean squared error:  0.008739673417911042


In [26]:
pred_Y, test_Y

(array([[0.01658246],
        [0.04212226],
        [0.01949976],
        [0.01287081],
        [0.03033757],
        [0.02277313],
        [0.0259808 ],
        [0.02369225],
        [0.01841968],
        [0.04291207],
        [0.03086956],
        [0.02186341],
        [0.02258066],
        [0.04486932],
        [0.01892184],
        [0.02433614],
        [0.01556627],
        [0.00840678],
        [0.01468552],
        [0.02746998],
        [0.02683873],
        [0.02339145],
        [0.0321329 ],
        [0.03046666],
        [0.02736916]]),
 array([[0.01702326],
        [0.0330894 ],
        [0.01332759],
        [0.01769968],
        [0.02957279],
        [0.01432535],
        [0.02180392],
        [0.01997998],
        [0.01215301],
        [0.03478654],
        [0.06148365],
        [0.02054215],
        [0.0197868 ],
        [0.06747359],
        [0.02135404],
        [0.02068567],
        [0.01967629],
        [0.01268896],
        [0.01406276],
        [0.03109393],
        

### Save Model

In [21]:
num_init_points = 0
num_loop_iters = 0


with open(f'experimental_design/models/{num_init_points}_init_{num_loop_iters}_loop_model_timeLoss.pkl', "wb") as f:
     pickle.dump(emukit_model, f)

### Analysis

As we can see, optimizing for model_variance causes emukit to alternate between extreme choices of \[gridSize, edgeMaxSpeed, maxSpeed, numberOfLanes, accel\] whilst only varying edgeLength sensibly. This means that we don't explore the parameter space comprehensively - which is undesirable.