In [None]:
import glob
import os
from typing import List
import matplotlib.pyplot as plt
import numpy as np
import ot
from scipy.spatial import distance
from scipy.stats import ks_2samp
from sklearn.model_selection import ShuffleSplit
from tqdm import tqdm
from domain_model import StateVariable, Activity, ActivityCategory, Scenario, Actor, \
    ActorCategory, Sinusoidal, EgoVehicle, State, Tag, Activity, Linear, \
    DocumentManagement
from stats import KDE
from cutin_detection import TARGET, STAT, EGO_CATEGORY
%matplotlib inline

In [None]:
# Load the database with the cut-in scenarios.
filename = os.path.join("data", "5_scenarios", "cut_in_scenarios2.json")
cutins = DocumentManagement(filename)

# Delete a few items for which we know they are not really cut ins.
#for i in [39, 130, 258, 288]:
#    cutins.delete_item("scenario", i)
    
nscenarios = len(cutins.collections["scenario"])
print("Number of scenarios: {:d}".format(nscenarios))

# Step 1: Item generation

An item denotes a combination of tags. In this case study, we consider tags for the longitudinal activities and tags for the lateral activities. For the longitudinal activities, we have:

- A: Accelerating
- C: Cruising
- D: Decelerating

For the lateral activities, we have:

- F: Lane following
- L: Lane changing

When combining these different tags, there are six possibilities: AF, CF, DF, AL, CL, and DL.

Our goal is to determine the so-called transition probabilities: what is the probability to go from one item to another item. To calculate this probability, use is made of the Markov assumption. This means that it is assumed that the probability of going from item $X_i$ to item $X_{i+1}$ only depends on item $X_i$ and not on any previous items.

In [None]:
# Go through all cut-ins and store the sequence of longitudinal activities.
activities = []
for i in cutins.collections["scenario"].keys():
    # Get the acts in chronological order.
    scenario = cutins.get_item("scenario", i)
    acts = [scenario.acts[j] for j in np.argsort([act[1].get_tstart() for act in scenario.acts])]
    
    # List the longitudinal activities of the target vehicle.
    curr_activities = []
    for act in acts:
        if act[0].name == "target vehicle" and \
                act[1].category.state == StateVariable.LON_TARGET:
            curr_activities.append(act[1].name[0])
    activities.append(curr_activities)

In [None]:
# Store in a dictionary the possibilities of 'item' sequences.
# 'af' means "accelerating and lane following"
# 'dl' means "decelerating and lane changing"
# 'cf', 'df', 'al', and 'cl' are similarly defined.
def get_transition(activities, main_act, other_acts):
    counter = np.zeros(1+len(other_acts), dtype=np.int)
    for activity in activities:
        try:
            i = activity.index(main_act)
        except ValueError:
            pass
        else:
            if i == len(activity)-1:
                counter[len(other_acts)] += 1
                continue
            for j, other_act in enumerate(other_acts):
                if activity[i+1] == other_act:
                    counter[j] += 1
    counter = counter / np.sum(counter)
    probs = dict()
    probs['end'] = counter[-1]
    for i, other_act in enumerate(other_acts):
        probs[other_act+'l'] = counter[i]
    return probs
transition = dict(af=dict(al=1),
                  cf=dict(cl=1),
                  df=dict(dl=1),
                  al=get_transition(activities, 'a', ['c', 'd']),
                  cl=get_transition(activities, 'c', ['a', 'd']),
                  dl=get_transition(activities, 'd', ['a', 'c']))

In [None]:
acts, counts = np.unique([activity[0] for activity in activities], return_counts=True)
transition['start'] = dict()
for act, count in zip(acts, counts/np.sum(counts)):
    transition['start'][act+'f'] = count
transition

The dictionary above shows the transition probabilities. We always start at "start". From the "start", we go through either of the items AF, CF, or DF. Once we are at item - let's say - AF, we can look what the probabilities are for the next possible items. In the case of AF, we go with probability 1 to AL. This is by the definition of the scenario. From item AL, we can either go to item CL or DL or we can end the scenario.

The next code is used to randomly generate sequence of items based on the above defined transition probabilities. It also calculates the probability of a given sequence. 

In [None]:
def next_item(probs):
    """ It is important that the probabilities in probs add up to 1. """
    rand = np.random.rand()
    for key, value in probs.items():
        if rand <= value:
            return key
        rand -= value

def generate_sequence(transitions, seed=None):
    if seed is not None:
        np.random.seed(0)
    sequence = ['start']
    while not sequence[-1] == 'end':
        sequence.append(next_item(transitions[sequence[-1]]))
    return sequence[1:-1]

def probability_sequence(transitions, sequence):
    sequence = ['start'] + sequence + ['end']
    prob = 1
    for act1, act2 in zip(sequence[:-1], sequence[1:]):
        prob *= transitions[act1][act2]
    return prob

In [None]:
np.random.seed(0)
n = 10000
seqs = [generate_sequence(transition) for _ in range(n)]

In [None]:
sequences = [['cf', 'cl'],
             ['af', 'al'],
             ['df', 'dl'],
             ['cf', 'cl', 'al'],
             ['cf', 'cl', 'dl'],
             ['af', 'al', 'cl'],
             ['af', 'al', 'dl'],
             ['df', 'dl', 'al'],
             ['df', 'dl', 'cl']] #,
x=[             ['cf', 'cl', 'al', 'cl'],
             ['cf', 'cl', 'al', 'dl'],
             ['cf', 'cl', 'dl', 'cl'],
             ['cf', 'cl', 'dl', 'al'],
             ['af', 'al', 'cl', 'al'],
             ['af', 'al', 'cl', 'dl'],
             ['af', 'al', 'dl', 'al'],
             ['af', 'al', 'dl', 'cl'],
             ['df', 'dl', 'al', 'cl'],
             ['df', 'dl', 'al', 'dl'],
             ['df', 'dl', 'cl', 'al'],
             ['df', 'dl', 'cl', 'dl']]
probs = [probability_sequence(transition, seq) for seq in sequences]
# .count() is very slow like this, but fine for now...
mc_probs = [seqs.count(seq)/n for seq in sequences]
real_seqs = [[y[0]+'f']+[x+'l' for x in y] for y in activities]
real_probs = [real_seqs.count(seq)/len(real_seqs) for seq in sequences]
for seq, prob, mc_prob, real_prob in zip(sequences, probs, mc_probs, real_probs):
    print("{:13s} {:.4f} {:.4f} {:.4f}".format(' '.join(seq), prob, mc_prob, real_prob))
print()
print("{:13s} {:.4f} {:.4f} {:.4f}".format("Total", np.sum(probs), np.sum(mc_probs), 
                                           np.sum(real_probs)))

Each columns has the following meaning:

1. The corresponding sequence of items.
2. The probability of this sequence based on the transition probabilities and the Markov assumption.
3. The number this sequence is generated divided over the total number of generated sequences.
4. The probability of this sequence based on the real data.

The similarity between the second and third column demonstrate that sequences are generated according to the calculated probability. Still, these probabilities are significantly different from the real probabilities (fourth column). The inaccuracy is introduced by the Markov assumption. 

# Step 2: Parameter generation

For each of the activities accelerating, cruising, decelerating, and lane changing, parameters are determined. Furthermore, some initial parameters are determined. For each of these parameters, a Kernel Density Estimation (KDE) is used to estimate the probability density. These KDEs are used to generate the parameters.

The following parameters are considered:

- Accelerating: Initial speed, mean acceleration, speed difference
- Cruising: Initial speed, log(duration), speed difference
- Decelerating: Initial speed, mean acceleration, speed difference
- Changing lane: Initial lateral position, Final lateral position, and duration

Note: For cruising, the log of the duration is used, because then the values are better behaved for constructing the KDE.

In [None]:
# Functions for obtaining the important parameters.
def parm_acc_dec(activity: Activity):
    """ Get initial speed, mean acceleration, and speed difference. """
    vstart, vend = activity.get_state(time=[activity.get_tstart(), activity.get_tend()])[0]
    amean = (vend - vstart) / (activity.get_tend() - activity.get_tstart())
    return vstart, amean, vend-vstart

def parm_cruise(activity: Activity):
    """ Get initial speed, log(duration), and speed difference. """
    vstart, vend = activity.get_state(time=[activity.get_tstart(), activity.get_tend()])[0]
    return vstart, np.log(activity.get_tend()-activity.get_tstart()), vend-vstart

def parm_lc(activity: Activity):
    """ Get initial position, end position, and duration. """
    ystart, yend = activity.get_state(time=[activity.get_tstart(), activity.get_tend()])
    return ystart, yend, activity.get_tend()-activity.get_tstart()

In [None]:
# Get the parameters of the longitudinal activities.
par_acc = []
par_cru = []
par_dec = []
par_lc = []
for i in cutins.collections["scenario"]:
    scenario = cutins.get_item("scenario", i)
    for activity in scenario.activities:
        if activity.name == "acceleration target":
            par_acc.append(parm_acc_dec(activity))
        elif activity.name == "deceleration target":
            par_dec.append(parm_acc_dec(activity))
        elif activity.name == "cruising target":
            par_cru.append(parm_cruise(activity))
        elif activity.name in ["right lane change", "left lane change"]:
            par_lc.append(parm_lc(activity))

In [None]:
par_initx = np.zeros(nscenarios)
par_egov = np.zeros(nscenarios)
par_tarv = np.zeros(nscenarios)
for i, key in enumerate(cutins.collections["scenario"]):
    scenario = cutins.get_item("scenario", key)
    for actor in scenario.actors:
        if actor.name == "ego vehicle":
            break
    par_egov[i] = scenario.get_state(actor, StateVariable.SPEED, scenario.get_tstart())
    
    for actor in scenario.actors:
        if actor.name == "target vehicle":
            break
    par_tarv[i], par_initx[i] = scenario.get_state(actor, StateVariable.LON_TARGET, 
                                                   scenario.get_tstart())

The following code is used to generate a scenario. It is a rather naive approach in the sense that it is assumed that the parameters of each of the activities are independent of all other parameters.

In [None]:
class Approach1:
    def __init__(self, par_egov, par_initx, par_lc, par_acc, par_cru, par_dec):
        # Create KDEs of initial parameters.
        self.kde_egov = KDE(par_egov)
        self.kde_initx = KDE(par_initx)
        
        # Create KDE of individual lane changes parameters.
        self.kde_lc_ystart = KDE(np.array(par_lc)[:, 0])
        self.kde_lc_yend = KDE(np.array(par_lc)[:, 1])
        self.kde_lc_dt = KDE(np.array(par_lc)[:, 2])
        
        # Create KDE of each of the longitudinal activities.
        self.kde_a = KDE(np.array(par_acc))
        self.kde_c = KDE(np.array(par_cru))
        self.kde_d = KDE(np.array(par_dec))
        
        # Compute the bandwidths
        for kde in (self.kde_egov, self.kde_initx, self.kde_lc_ystart, self.kde_lc_yend,
                    self.kde_lc_dt, self.kde_a, self.kde_c, self.kde_d):
            kde.compute_bandwidth()
    
    def generate(self, items):
        inits = [self.kde_egov.sample()[0][0], self.kde_initx.sample()[0][0],
                 np.random.rand()]
        lc = [self.kde_lc_ystart.sample()[0][0], self.kde_lc_yend.sample()[0][0], 
              self.kde_lc_dt.sample()[0][0]]
        lon = []
        for item in items[1:]:
            if item[0] == "a":
                lon.append(self.kde_a.sample()[0])
            elif item[0] == "c":
                lon.append(self.kde_c.sample()[0])
                lon[-1][1] = np.exp(lon[-1][1])
            else:
                lon.append(self.kde_d.sample()[0])
        return (inits, lc, lon)
    

class Approach2(Approach1):
    def __init__(self, par_egov, par_initx, par_lc, par_acc, par_cru, par_dec):
        Approach1.__init__(self, par_egov, par_initx, par_lc, par_acc, par_cru, par_dec)
    
    def generate(self, items):
        inits = [self.kde_egov.sample()[0][0], self.kde_initx.sample()[0][0],
                 np.random.rand()]
        lc = [self.kde_lc_ystart.sample()[0][0], self.kde_lc_yend.sample()[0][0], 
              self.kde_lc_dt.sample()[0][0]]
        
        if items[1][0] == "a":
            lon = [self.kde_a.sample()[0]]
        elif items[1][0] == "c":
            lon = [self.kde_c.sample()[0]]
            lon[0][1] = np.exp(lon[0][1])
        else:
            lon = [self.kde_d.sample()[0]]
            
        for item in items[2:]:
            start_speed = lon[-1][0] + lon[-1][2]
            if item[0] == "a":
                par2, par3 = self.kde_a.conditional_sample(0, start_speed, 1)[0]
            elif item[0] == "c":
                par2, par3 = self.kde_c.conditional_sample(0, start_speed, 1)[0]
                par2 = np.exp(par2)
            else:
                par2, par3 = self.kde_d.conditional_sample(0, start_speed, 1)[0]
            lon.append(np.array([start_speed, par2, par3]))
        return (inits, lc, lon)
    
A1 = Approach1(par_egov, par_initx, par_lc, par_acc, par_cru, par_dec)
A2 = Approach2(par_egov, par_initx, par_lc, par_acc, par_cru, par_dec)

It might be possible that the generated parameters are invalid for the given sequence of items. For example, consider the case that we have the items AF and AL. In that case, it means that the lane change activity ends *before* the acceleration activity ends. So if the parameters are such that the lane change activity ends *after* the acceleration activity ends, the parameters are invalid. 

The following checks are done:

- The initial speed of the ego vehicle should be strictly positive.
- The initial lateral position of the target vehicle should be 2 meters away from the center of the ego lane.
- Each activity should be strictly longer than 0 seconds.
- The mean acceleration and the speed difference for an acceleration activity should be strictly positive.
- The mean acceleration and the speed difference for a deceleration activity should be strictly negative.
- The last longitudinal activity should start before the end of the lane change.
- The last longitudinal activity should end after the end of the lane change.

In [None]:
def valid_cutin(sequence, parameters, verbose=False):
    # Ego speed should be positive.
    if parameters[0][0] < 0:
        if verbose:
            print("Negative ego speed")
        return False
    
    # Start y should be at least 2 m away from ego lane. End should be less than 1.5 m.
    if np.abs(parameters[1][0]) < 2 or np.abs(parameters[1][1]) > 1.5:
        if verbose:
            print("Wrong lane change parameters")
        return False
    
    durations = np.zeros(len(sequence) - 1)
    for i, (item, par) in enumerate(zip(sequence[1:], parameters[2])):
        # Each longitudinal activity should be longer than 0. 
        # For acc/dec, it means than amean is pos/neg.
        if (item[0] == "a" and par[1] <= 0) or (item[0] == "d" and par[1] >= 0):
            if verbose:
                print("Wrong acceleration/deceleration")
            return False
        if item[0] == "c" and par[1] <= 0:
            if verbose:
                print("Wrong cruising", sequence, parameters)
            return False
        
        # With acceleration/deceleration, there should be a speed increase/decrease.
        if (item[0] == "a" and par[2] <= 0) or (item[0] == "d" and par[2] >= 0):
            return False
        
        if item[0] in ["a", "d"]:
            durations[i] = par[2] / par[1]
        else:
            durations[i] = par[1]
        
    # Last lon activity should start before end lane change and end after it.
    total_duration = -durations[0] * parameters[0][2]
    total_duration += np.sum(durations[:-2])
    if total_duration > parameters[1][2]:
        if verbose:
            print("Longitudinal activities too long")
        return False
    if total_duration + durations[-1] < parameters[1][2]:
        if verbose:
            print("Longitudinal activities too short")
        return False
    
    return True

The next code is an example of a generated "scenario", where the scenario is now presented by a sequence of items and the parameters.

In [None]:
np.random.seed(4)
sequence = generate_sequence(transition)
parameters = A1.generate(sequence)
while not valid_cutin(sequence, parameters, verbose=True):
    parameters = A1.generate(sequence)
print(sequence)
print(parameters)
valid_cutin(sequence, parameters)

# Step 3: Create Scenario object

Now that we have the code to generate a sequence of items and the corresponding parameters, we can also represent this sequence of items and list of parameters as a Scenario object. The following code converts the sequence of items and the list of parameters to a Scenario object. The following assumptions are used:

- For all activities other than cruising, a sinusoidal model is used.
- For cruising, a linear model is used.
- The scenario happens at a straight road.
- No other actors are involved than the ego vehicle and the target vehicle.

In [None]:
SINUSOIDAL = Sinusoidal()
LC_TARGET = ActivityCategory(SINUSOIDAL, StateVariable.LAT_TARGET, 
                             name="lane change target",
                             tags=[Tag.VehicleLateralActivity_ChangingLane])
DEC_TARGET = ActivityCategory(SINUSOIDAL, StateVariable.SPEED, name="deceleration target",
                              tags=[Tag.VehicleLongitudinalActivity_DrivingForward_Braking])
ACC_TARGET = ActivityCategory(SINUSOIDAL, StateVariable.SPEED, name="acceleration target",
                              tags=[Tag.VehicleLongitudinalActivity_DrivingForward_Accelerating])
CRU_TARGET = ActivityCategory(Linear(), StateVariable.SPEED, name="cruising target",
                              tags=[Tag.VehicleLongitudinalActivity_DrivingForward_Cruising])

def create_scenario(sequence, parameters):
    # Create actors.
    ego = EgoVehicle(EGO_CATEGORY, 
                     initial_states=[State(StateVariable.SPEED, parameters[0][0]),
                                     State(StateVariable.LONGITUDINAL_POSITION, 0)],
                     name="ego vehicle")
    target = Actor(TARGET,
                   initial_states=[State(StateVariable.LONGITUDINAL_POSITION, parameters[0][1])],
                   name="target vehicle")
    
    # Create lane change activity of the target.
    lane_change = Activity(LC_TARGET, start=0, end=parameters[1][2],
                           parameters=dict(xstart=parameters[1][0], xend=parameters[1][1]))
    
    # Create longitudinal activities of the target.
    lon_activities = []
    tstart = 0
    for item, pars in zip(sequence[1:], parameters[2]):
        if len(lon_activities) == 0:
            # Determine the start time for this one.
            if item[0] == "c":
                tstart = -parameters[0][2] * pars[1]
            else:
                tstart = -parameters[0][2] * pars[2] / pars[1]
            start = tstart
        if item[0] == "a":
            lon_activities.append(Activity(ACC_TARGET, start=start, end=tstart+pars[2]/pars[1],
                                           parameters=dict(xstart=pars[0], xend=pars[0]+pars[2])))
        elif item[0] == "d":
            lon_activities.append(Activity(DEC_TARGET, start=start, end=tstart+pars[2]/pars[1],
                                           parameters=dict(xstart=pars[0], xend=pars[0]+pars[2])))
        else:
            lon_activities.append(Activity(CRU_TARGET, start=start, end=tstart+pars[1],
                                           parameters=dict(xstart=pars[0], xend=pars[0]+pars[2])))
        tstart = lon_activities[-1].get_tend()
        start = lon_activities[-1].end
            
    # Create the scenario
    scenario = Scenario(start=lane_change.start, end=lane_change.end)
    scenario.set_physical_elements([STAT])
    scenario.set_activities([lane_change]+lon_activities)
    scenario.set_actors([ego, target])
    scenario.set_acts([(target, activity) for activity in [lane_change]+lon_activities])
    return scenario            

s2 = create_scenario(sequence, parameters)
s1 = cutins.get_item("scenario", key)

In [None]:
def create_valid_scenario(method):
    sequence = generate_sequence(transition)
    while len(sequence) > 4:
        sequence = generate_sequence(transition)
    parameters = method.generate(sequence)
    while not valid_cutin(sequence, parameters):
        parameters = method.generate(sequence)
    return create_scenario(sequence, parameters)

The following code is used to generate a cut-in scenario in the "old" way. The following parameters are used:

- The initial speed of the ego vehicle
- The initial lateral position of the target vehicle (w.r.t. the ego vehicle's lane center)
- The final lateral position of the target vehicle (w.r.t. the ego vehicle's lane center)
- The duration of the lane change
- The constant speed of the target vehicle
- The initial relative longitudinal position of the target vehicle

With `OriginalApproach1`, all parameters are assumed to be independent whereas with `OriginalApproach2`, all parameters are assumed to be dependent.

In [None]:
class OriginalApproach1:
    def __init__(self, par_egov, par_lc, par_tarv, par_initx):
        self.kde_egov = KDE(par_egov)
        self.kde_lc_ystart = KDE(np.array(par_lc)[:, 0])
        self.kde_lc_yend = KDE(np.array(par_lc)[:, 1])
        self.kde_lc_dt = KDE(np.array(par_lc)[:, 2])
        self.kde_tarv = KDE(par_tarv)
        self.kde_initx = KDE(par_initx)
        
        for kde in [self.kde_egov, self.kde_lc_ystart, self.kde_lc_yend, self.kde_lc_dt,
                    self.kde_tarv,self.kde_initx]:
            kde.compute_bandwidth()
    
    def generate_parameters(self):
        while True:
            pars = (self.kde_egov.sample()[0][0],
                    self.kde_lc_ystart.sample()[0][0],
                    self.kde_lc_yend.sample()[0][0],
                    self.kde_lc_dt.sample()[0][0],
                    self.kde_tarv.sample()[0][0],
                    self.kde_initx.sample()[0][0])
            if self.check_pars(pars):
                break
        return pars
    
    def check_pars(self, pars):
        if pars[0] <= 0 or pars[3] <= 0 or pars[4] <= 0:
            return False
        if abs(pars[1]) < 2 or 2 < abs(pars[2]):
            return False
        return True
    
    def create_scenario(self):
        pars = self.generate_parameters()
        
        # Create actors.
        ego = EgoVehicle(EGO_CATEGORY, 
                         initial_states=[State(StateVariable.SPEED, pars[0]),
                                         State(StateVariable.LONGITUDINAL_POSITION, 0)],
                         name="ego vehicle")
        target = Actor(TARGET,
                       initial_states=[State(StateVariable.LONGITUDINAL_POSITION, pars[5])],
                       name="target vehicle")

        # Create lane change activity of the target.
        lane_change = Activity(LC_TARGET, start=0, end=pars[3],
                               parameters=dict(xstart=pars[1], xend=pars[2]))
        
        # Create the cruising activity of the target.
        cruising = Activity(CRU_TARGET, start=lane_change.start, end=lane_change.end,
                            parameters=dict(xstart=pars[4], xend=pars[4], tstart=0, tend=pars[3]))
            
        # Create the scenario
        scenario = Scenario(start=lane_change.start, end=lane_change.end)
        scenario.set_physical_elements([STAT])
        scenario.set_activities([lane_change, cruising])
        scenario.set_actors([ego, target])
        scenario.set_acts([(target, lane_change), (target, cruising)])
        return scenario

class OriginalApproach2(OriginalApproach1):
    def __init__(self, par_egov, par_lc, par_tarv, par_initx):
        self.kde = KDE(np.stack((par_egov, 
                                 np.array(par_lc)[:, 0],
                                 np.array(par_lc)[:, 1],
                                 np.array(par_lc)[:, 2],
                                 par_tarv,
                                 par_initx)).T)
        self.kde.compute_bandwidth()
    
    def generate_parameters(self):
        while True:
            pars = self.kde.sample()[0]
            if self.check_pars(pars):
                break
        return pars

# Step 4: Compare two scenarios

Now that we are able to generate a Scenario object, we are able to compare the generated scenario with an original scenario. To do this, we collect "features" of the scenarios. This is a very much a manual process. As an alternative, a more generalizable metric could be used to define the "difference" between two scenarios, but an earlier study showed that it is better to have a more "manual" approach, such that "the expert" can choose what is an important difference and what is not.

In the case of the cut-in scenario, the following "features" are used:

1. The initial longitudinal position of the target vehicle with respect to the ego vehicle.
2. The total duration of the scenario.
3. The lateral position of the target vehicle with respect to the ego vehicle's lane center.
4. The speed of the target vehicle.
5. The initial speed of the ego vehicle.

Note that the first, second, and fifth features are scalars and the third and fourth features are vectors.

In [None]:
# Define the different 'measures'.
# Measure 1: Initial longitudinal position of target vehicle.
def get_initial_lon_position(scenario: Scenario):
    actor = scenario.get_actor_by_name("target vehicle")
    if actor.initial_states:
        return actor.initial_states[0].value[0]
    return scenario.get_state(actor, StateVariable.LON_TARGET, time=scenario.get_tstart())[1]

# Meaure 2: Total duration of the scenario.
def get_duration(scenario: Scenario):
    return scenario.get_tend() - scenario.get_tstart()

# Measure 3: Get vector of y positions.
def get_lateral_position(scenario: Scenario):
    for activity in scenario.activities:
        if activity.category.state == StateVariable.LAT_TARGET:
            return activity.get_state(npoints=20)

# Measure 4: Get vector of speeds of target vehicle.
def get_speed_target(scenario: Scenario):
    actor = scenario.get_actor_by_name("target vehicle")
    speed = scenario.get_state(actor, StateVariable.SPEED,
                               np.linspace(scenario.get_tstart(), scenario.get_tend(), 20))
    if speed is not None:
        return speed
    return scenario.get_state(actor, StateVariable.LON_TARGET,
                              np.linspace(scenario.get_tstart(), scenario.get_tend(), 20))[:, 1]

# Measure 5: Get initial speed of ego vehicle.
def get_initial_speed_ego(scenario: Scenario):
    actor = scenario.get_actor_by_name("ego vehicle")
    if actor.initial_states:
        return actor.initial_states[0].value[0]
    return scenario.get_state(actor, StateVariable.SPEED, scenario.get_tstart())

def func_features(scenario: Scenario):
    return (get_initial_lon_position(scenario),
            get_duration(scenario),
            get_lateral_position(scenario),
            get_speed_target(scenario),
            get_initial_speed_ego(scenario))

def func_comparison(features1, features2):
    return np.array((np.abs(features1[0] - features2[0]),
                     np.abs(features1[1] - features2[1]),
                     np.sqrt(np.sum((features1[2] - features2[2])**2)),
                     np.sqrt(np.sum((features1[3] - features2[3])**2)),
                     np.abs(features1[4] - features2[4])))

# 5 Compare two sets of scenarios

In [None]:
class CompareScenarioSets:
    def __init__(self, func_comparison):
        self.func_comparison = func_comparison
        self.set1, self.set2 = None, None
        self.weights = None
        
    def set_sets(self, set1: List[Scenario] = None, set2: List[Scenario] = None):
        if set1 is not None:
            self.set1 = set1
        if set2 is not None:
            self.set2 = set2
    
    def metric(self, scenario1, scenario2):
        vector = self.func_comparison(scenario1, scenario2)
        if self.weights is not None:
            vector *= self.weights
        return np.sqrt(np.sum(vector**2))
    
    def compute_weights(self):
        i, j, k = 0, 0, 1
        nscenarios = len(self.set1)
        difference = np.zeros((nscenarios*(nscenarios-1)//2, len(self.set1[0])))
        for i in range(nscenarios*(nscenarios-1)//2):
            difference[i, :] = self.func_comparison(self.set1[j], self.set1[k])
            k += 1
            if k == nscenarios:
                j += 1
                k = j + 1
        self.weights = 1 / np.std(difference, axis=0)
    
    def pdist(self):
        i, j, k = 0, 0, 1
        nscenarios = len(self.set1)
        dist = np.zeros(nscenarios*(nscenarios-1)//2)
        for i in range(nscenarios*(nscenarios-1)//2):
            dist[i] = self.metric(self.set1[j], self.set1[k])
            k += 1
            if k == nscenarios:
                j += 1
                k = j + 1
        return distance.squareform(dist)
    
    def cdist(self):
        dist = np.zeros((len(self.set1), len(self.set2)))
        for i, features1 in enumerate(self.set1):
            for j, features2 in enumerate(self.set2):
                dist[i, j] = self.metric(features1, features2)
        return dist
    
    def wasserstein(self):
        return ot.emd2([], [], self.cdist())
    
    def self_score(self, n_splits: int = 10):
        compare = CompareScenarioSets(self.func_comparison)
        compare.weights = self.weights
        splitter = ShuffleSplit(n_splits=n_splits, test_size=0.5)
        scores = np.zeros(n_splits)
        for i, (index1, index2) in enumerate(splitter.split(self.set1)):
            compare.set_sets([self.set1[j] for j in index1], 
                             [self.set1[j] for j in index2])
            scores[i] = compare.wasserstein()
        return scores

In [None]:
np.random.seed(0)
features_observed = [func_features(cutins.get_item("scenario", i))
                     for i in cutins.collections["scenario"]]
n_splits = 10
splitter = ShuffleSplit(n_splits=n_splits, test_size=0.5)
splits = list(splitter.split(features_observed))
model_scores1 = np.zeros(n_splits)
model_scores2 = np.zeros(n_splits)
compare = CompareScenarioSets(func_comparison)
for i, (index1, index2) in enumerate(splits):
    compare.set_sets([features_observed[j] for j in index1],
                     [features_observed[j] for j in index2])
    model_scores1[i] = compare.wasserstein()
    compare.set_sets([features_observed[j] for j in index1],
                     [features_observed[j] for j in index1])
    model_scores2[i] = compare.wasserstein()
model_scores = 2*model_scores1 - model_scores2

In [None]:
index1

In [None]:
np.random.seed(0)
scores_original1 = np.zeros((n_splits, 2))
scores_original2 = np.zeros((n_splits, 2))
for i, (index1, index2) in enumerate(tqdm(splits)):
    parameters = (par_egov[index1],
                  [par_lc[j] for j in index1],
                  par_tarv[index1],
                  par_initx[index1])
    OA1 = OriginalApproach1(*parameters)
    OA2 = OriginalApproach2(*parameters)
    for j, approach in enumerate([OA1, OA2]):
        scenario_features = [func_features(approach.create_scenario())
                             for _ in range(len(index1))]
        compare.set_sets(set1=[features_observed[j] for j in index2], set2=scenario_features)
        scores_original1[i, j] = compare.wasserstein()
        compare.set_sets(set1=[features_observed[j] for j in index1], set2=scenario_features)
        scores_original2[i, j] = compare.wasserstein()
scores_original = 2*scores_original1 - scores_original2

In [None]:
np.random.seed(0)
scores1 = np.zeros((n_splits, 2))
scores2 = np.zeros((n_splits, 2))
for i, (index1, index2) in enumerate(tqdm(splits)):
    parameters = (par_egov[index1], 
                  par_initx[index1], 
                  [par_lc[j] for j in index1], 
                  par_acc, par_cru, par_dec)
    A1 = Approach1(*parameters)
    A2 = Approach2(*parameters)
    for j, method in enumerate([A1, A2]):
        scenario_features = [func_features(create_valid_scenario(method)) 
                             for _ in range(len(index1))]
        compare.set_sets(set1=[features_observed[j] for j in index2], set2=scenario_features)
        scores1[i, j] = compare.wasserstein()
        compare.set_sets(set1=[features_observed[j] for j in index1], set2=scenario_features)
        scores2[i, j] = compare.wasserstein()
scores = 2*scores1 - scores2

In [None]:
all_scores = np.concatenate((scores_original, scores), axis=1)
np.mean(all_scores, axis=0)

In [None]:
np.std(all_scores, axis=0)

In [None]:
ks_2samp(all_scores[:, 0], all_scores[:, 3])

In [None]:
plt.boxplot(all_scores)
plt.xticks([1, 2, 3, 4], ["Orinal, indep.", "Original, dep.", "Approach1", "Approach2"])

In [None]:
plt.boxplot(np.concatenate((all_scores, model_scores[:, np.newaxis]), axis=1))
plt.xticks([1, 2, 3, 4, 5], ["Orinal, indep.", "Original, dep.", "Approach1", "Approach2", "Data"])

In [None]:
def approach_svd(s, q=3):
    x = np.zeros((len(s), 3+2*20))
    for i, scen in enumerate(s):
        x[i, 0] = scen[0]
        x[i, 1] = scen[1]
        x[i, 2] = scen[4]
        x[i, 3:] = np.concatenate(scen[2:4])
        
    mean = np.mean(x, axis=0)
    u,s,v = np.linalg.svd(x-mean, full_matrices=False)
    k = KDE(u[:, :q], scaling=True)
    k.compute_bandwidth()
    y = np.zeros_like(x)
    y = np.dot(k.sample(len(k.data)) * s[:q], v[:q]) + mean
    return [(yy[0], yy[1], yy[3:23], yy[23:43], yy[2]) for yy in y]


In [None]:
scores_svd1 = np.zeros(n_splits)
scores_svd2 = np.zeros_like(scores_svd1)
for i, (index1, index2) in enumerate(splits):
    y = approach_svd([features_observed[j] for j in index1])
    compare.set_sets(set1=[features_observed[j] for j in index2], set2=y)
    scores_svd1[i] = compare.wasserstein()
    compare.set_sets(set1=[features_observed[j] for j in index1], set2=y)
    scores_svd2[i] = compare.wasserstein()
scores_svd = 2*scores_svd1 - scores_svd2

In [None]:
model_scores

In [None]:
scores_svd