In [1]:
%cd ../

/Users/Matteo/PycharmProjects/TrafficEmu


In [2]:
# If you get a SciPy error when installing Emukit, build it from source:

# git clone https://github.com/amzn/Emukit.git
# cd Emukit
# pip install -r requirements/requirements.txt
# python setup.py develop

In [3]:
import numpy as np
import pickle
import emukit as ek
import GPy

from emukit.model_wrappers import GPyModelWrapper
from emukit.experimental_design.experimental_design_loop import ExperimentalDesignLoop
from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter
from emukit.core.initial_designs import RandomDesign, latin_design
from emukit.experimental_design.acquisitions import ModelVariance, IntegratedVarianceReduction
from emukit.core.loop import UserFunctionWrapper
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement
from emukit.bayesian_optimization.loops import BayesianOptimizationLoop
from emukit.core.optimization import GradientAcquisitionOptimizer

from sumo_grid_simulation.grid_simulation import Simulator

### Pseudocode

Fitting a gaussian process to a simulator using Emukit takes the following form (taken from L48 lectures):

```
initialize GP with some randomly chosen points
while stopping condition is not met:
    compute candidate point(s) using GP and acquisition funciton (model_variance) -> new point
    evaluate this new point with our simulator/user function -> observation
    update model with new observation -> new GP
```

### Simulator

In [4]:
# trips_generator_period = 5 else simulation is v. slow
simulator = Simulator(trips_generator_period=5, end_time=300)

### User Function
 
This is the function we want to understand. Namedly, how are CO2 emissions and timeLoss (the time lost due to driving below the ideal speed) affected by the following parameters: 

```
gridSize: the size of the grid network where the simulation is carried out
edgeMaxSpeed: legal speed limit in m/s - this can be exceeded (11.11 == 40km/h)
maxSpeed: the absolute maximum velocity of any vehicle in m/s (55.55 == 200 km/h)
edgeLength: length of the roads between intersections in meters
numLanes: number of lanes per road
accel: The acceleration ability of vehicles in m/s^2.
```

*A complete list of parmeters analysed is discussed in our report.*

In [13]:
def user_function_time_loss(X):
    """  X = inputs - emukit doesnt pass named args, just an NxM ndarray, N is the number of points to evaluate, M is the number of parameters per each point """
    result = []
    i = 0
    
    print(f'\nUser function called with {X.shape[0]} inputs to simulate')

    
    for gridSize, edgeMaxSpeed, maxSpeed, edgeLength, numLanes, accel in X:
        print(f'\nEvaluating input: {i+1} of {X.shape[0]}\n')
        s = simulator.simulate(
            gridSize      = int(gridSize),
            edgeMaxSpeed  = edgeMaxSpeed,
            maxSpeed      = maxSpeed,
            edgeLength    = int(edgeLength),
            numberOfLanes = int(numLanes),
            accel         = accel
        )
        # average time loss / average route length
        result.append(s['timeLoss']/s['routeLength'])
        i += 1
        
    # expand dims is essential or the acquition function breaks
    return np.expand_dims(np.array(result), 1)  

### Model (GP)

Our surrogate model is our emulator. In this case, a gaussian process. I think that emukit handles categorical/discrete inputs using one-hot encodings.

#### Model Inputs

In [14]:
# gridSize = ContinuousParameter('gridSize', min_value=3, max_value=20)
# edgeMaxSpeed = ContinuousParameter('edgeMaxSpeed', min_value=1, max_value=25)
# maxSpeed = ContinuousParameter('maxSpeed', min_value=1, max_value=25)
# edgeLength = ContinuousParameter('edgeLength', min_value=30, max_value=200)
# numberOfLanes = DiscreteParameter('numberOfLanes', domain=[1,2,3])
# accel = ContinuousParameter('accel', 1., 6.)

# parameter_space = ParameterSpace([gridSize, edgeMaxSpeed, maxSpeed, edgeLength, numberOfLanes, accel])
import experimental_design.config as config
parameter_space = config.get_parameter_space()

#### Initialize Model / Emulator (GP)

In [15]:
init_X, init_Y = pickle.load(open('experimental_design/2000_init_points_6_param.pkl', 'rb')) # load 2000 init points
# init_X, init_Y = pickle.load(open('experimental_design/250_init_points_6_param.pkl', 'rb')) load 250 init points


'''
design = RandomDesign(parameter_space)  # initialize with random points
num_data_points = 2000
init_X = design.get_samples(num_data_points)
init_Y = user_function_time_loss(init_X)
with open('2000_init_points_6_param.pkl', "wb") as f:
      pickle.dump((init_X, init_Y), f)
'''

print(init_X.shape, init_Y.shape)

(2000, 6) (2000, 1)


In [16]:
# emulator = pickle.load(open('experimental_design_timeLoss_50_iter_batch_size_3.pkl',"rb"))
emulator = GPy.models.GPRegression(init_X, init_Y)
emukit_model = GPyModelWrapper(emulator)
emulator

GP_regression.,value,constraints,priors
rbf.variance,1.0,+ve,
rbf.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


### Optimization

#### Acquisition Function

In [17]:
model_variance = ModelVariance(model=emukit_model)

#### Optimizer

In [18]:
optimizer = GradientAcquisitionOptimizer(parameter_space)

#### Experimental Design

In [19]:
ed_loop = ExperimentalDesignLoop(
    model = emukit_model,
    space = parameter_space,
    acquisition = model_variance,
    acquisition_optimizer = optimizer,
    batch_size = 1
)

In [None]:
ed_loop.run_loop(user_function_time_loss, 100)

Optimization restart 1/1, f = -4217.65121277893

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4216.589455953628

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4217.677709905973

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4219.2470545538845

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4220.585377265888

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4222.1092932633455

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4212.6201924295965

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4214.024002364861

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4211.9687070119135

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4212.808829661773

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4213.519162101413

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4213.552163747106

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4174.910006198291

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4176.534828999343

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4178.013148044403

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




Optimization restart 1/1, f = -4179.917159133824

User function called with 1 inputs to simulate 


Evaluating input: 1 of 1

 Retrying in 1 seconds




### Evaluate

#### Test on 25 point test set

In [None]:
test_X, test_Y = pickle.load(open('experimental_design/test_points_6_param.pkl', 'rb'))

'''
design = RandomDesign(parameter_space)  # initialize with random points
num_data_points = 25
test_X = design.get_samples(num_data_points)
test_Y = user_function_time_loss(test_X)
with open('test_points_6_param.pkl', "wb") as f:
      pickle.dump((test_X, test_Y), f)
'''

In [None]:
pred_Y, pred_Y_variance = emukit_model.predict(test_X)
mse = np.sqrt(np.mean((pred_Y-test_Y)**2))
print('Root mean squared error: ', mse)

In [None]:
pred_Y, test_Y

### Save Model

In [None]:
with open('timeLoss_model_variance_100_iter_batch_size_1.pkl', "wb") as f:
     pickle.dump(emulator, f)

### Analysis

As we can see, optimizing for model_variance causes emukit to alternate between extreme choices of \[gridSize, edgeMaxSpeed, maxSpeed, numberOfLanes, accel\] whilst only varying edgeLength sensibly. This means that we don't explore the parameter space comprehensively - which is undesirable.