# Experiments for paper on surrogate metric

In [None]:
import os
import matplotlib.pyplot as plt
import matplotlib.ticker
import numpy as np
import pandas as pd
import pickle
import scipy.signal
import scipy.spatial.distance as dist
from simulation import WangStamatiadis, WSDriver, SimulationApproaching, ws_approaching_pars, \
    LeaderInteraction, IDMPlus, SimulationLongitudinal, IDMParameters, LeaderInteractionParameters
from stats import KDE, kde_from_file

### Parameters

In [None]:
OVERWRITE = False

## Show that our metric is a generalisation of W&S' metric

In [None]:
N_MONTE_CARLO_WS = 500
WS_TTCS = np.linspace(.5, 4, 36)
WS_SPEED_DIFFS = [10, 20, 30]
WS_LINESTYLES = ['-', '--', ':']
WS_COLORS = [(0, 0, 0), (.5, .5, .5)]
WS_LINEWIDTHS = [3, 3]
WS_FILENAME = os.path.join("data", "7_simulation_results", "ws_comparison")

In [None]:
tolerance = 0.02
driver = WSDriver()
simulation = SimulationApproaching([driver], [ws_approaching_pars], tolerance=tolerance,
                                   max_simulation=500)
simulation.min_simulations = 20
def ws_with_our_method(ttc: float, speed_diff: float):
    return simulation.get_probability(dict(vego=speed_diff, ratio_vtar_vego=0, 
                                           init_position=ttc*speed_diff))
    
ws = WangStamatiadis()

In [None]:
FILENAME = WS_FILENAME + "_tol{:.0f}.p".format(100*tolerance)
if os.path.exists(FILENAME):
    with open(WS_FILENAME, "rb") as file:
        result = pickle.load(file)
if not os.path.exists(FILENAME) or OVERWRITE or \
        not result.shape == (len(WS_TTCS), len(WS_SPEED_DIFFS), 2):
    result = np.zeros((len(WS_TTCS), len(WS_SPEED_DIFFS), 2))
    np.random.seed(0)
    for i, ttc in enumerate(WS_TTCS):
        for j, speed_diff in enumerate(WS_SPEED_DIFFS):
            result[i, j, 0] = ws.prob_collision(ttc, speed_diff)
            result[i, j, 1] = ws_with_our_method(ttc, speed_diff)
    with open(FILENAME, "wb") as file:
        pickle.dump(result, file)

In [None]:
for i, speed_diff in enumerate(WS_SPEED_DIFFS):
    plt.plot(WS_TTCS, result[:, i, 0], ls=WS_LINESTYLES[i], color=WS_COLORS[0], 
             lw=WS_LINEWIDTHS[0])
    plt.plot(WS_TTCS, result[:, i, 1], ls=WS_LINESTYLES[i], color=WS_COLORS[1],
             lw=WS_LINEWIDTHS[1])
plt.xlabel("TTC [s]")
plt.ylabel("$P(C|x)$ according to the WS metric")
plt.xlim(WS_TTCS[0], WS_TTCS[-1])
plt.ylim(0, 1)
plt.grid()

## Apply method on NGSIM data set to create a risk metric

In [None]:
NGSIM_DATA = os.path.join("data", "8_interactions_v3", 
                          "dNGSIM_iLongitudinal_mFull_Reconstruction_e100_wi1_w1_hs32_bs2",
                          "interactions.pkl")
NGSIM_KDE = os.path.join("data", "6_kde", "NGSIM.p")
NGSIM_PROB_COLLISION = os.path.join("data", "7_simulation_results", 
                                    "prob_collision_NGSIM.csv")
SCALING_GRID = [2, .5, 2, .25]

In [None]:
# Load data.
with open(NGSIM_DATA, 'rb') as file:
    all_interactions = pickle.load(file)
locations = sorted(all_interactions.keys())

In [None]:
# Filter for speed and acceleration
def filter_signal(signal):
    return scipy.signal.savgol_filter(signal, 15, 1)

In [None]:
def get_pars(vel_acc):
    if len(vel_acc) < 15:
        return np.zeros((0, 4))
    if np.max(np.abs(np.diff(vel_acc['Velocity_X']))) > 1.5:
        return np.zeros((0, 4))
    
    data = vel_acc.copy()
    data['ax_savgol'] = filter_signal(data["Acceleration_X"])
    data['vx_savgol'] = filter_signal(data["Velocity_X"])
    i = data.index[scipy.signal.find_peaks(-data['vx_savgol'], prominence=1)[0]]
    data['endspeed'] = np.nan
    data['endtime'] = np.nan
    data.loc[i, 'endspeed'] = data.loc[i, 'vx_savgol']
    data.loc[i, 'endtime'] = i
    data = data.fillna(method='backfill')
    data = data.dropna()

    data['duration'] = data['endtime'] - data.index
    data['vdiff'] = data['endspeed'] - data['vx_savgol']
    data['amean'] = data['vdiff'] / data['duration']
    data = data.drop(i)
    return data[['vx_savgol', 'ax_savgol', 'vdiff', 'amean']].values[::10]

In [None]:
if OVERWRITE or not os.path.exists(NGSIM_KDE):
    parameters = []
    for location in locations:
        parameters += [get_pars(interaction['leader']) for interaction in 
                       all_interactions[location].values()]
    kde = KDE(np.concatenate(parameters), scaling=True)
    kde.clustering(kde._maxdist()*5)
    kde.compute_bandwidth()
    print("Bandwidth: {:.4f}".format(kde.get_bandwidth()))
    kde.pickle(NGSIM_KDE)
else:
    kde = kde_from_file(NGSIM_KDE)

In [None]:
def leader_parameters(**kwargs):
    return LeaderInteractionParameters(init_position=kwargs["gap"],
                                       init_speed=kwargs["v0_lead"],
                                       init_acceleration=kwargs["a0_lead"],
                                       speed_difference=kwargs["dv"],
                                       duration=kwargs["duration"])

def follower_parameters(**kwargs):
    return IDMParameters(amin=kwargs["amin"],
                         speed=kwargs["v0_host"],
                         n_reaction=int(kwargs["tr"]*100),
                         init_speed=kwargs["v0_host"],
                         init_position=0)

def get_other_pars(**kwargs):
    # Get the speed difference and the mean acceleration from the KDE.
    while True:
        (kwargs["dv"], kwargs["amean"]), = kde.conditional_sample([0, 1], [kwargs["v0_lead"], 
                                                                           kwargs["a0_lead"]])
        if np.sign(kwargs["dv"]) == np.sign(kwargs["amean"]):
            break
    kwargs["duration"] = kwargs["dv"] / kwargs["amean"]
    
    # Get reaction time from a lognormal distribution with mean=.92, std=0.28
    if "tr" not in kwargs:
        kwargs["tr"] = np.random.lognormal(np.log(.92**2 / np.sqrt(.92**2 + .28**2)), 
                                           np.sqrt(np.log(1 + .28**2/.92**2)))
    
    # Get the braking capacity from a truncated normal distribution
    if "amin" not in kwargs:
        while True:
            kwargs["amin"] = np.random.normal(-8.45, 1.4)
            if -12.68 < kwargs["amin"] < -4.23:
                break
    
    return kwargs

simulation = SimulationLongitudinal(LeaderInteraction(), leader_parameters,
                                    IDMPlus(), follower_parameters)
simulation.min_simulation_time = 2

def get_probability(**kwargs):
    """
    Parameters to provide:
    - v0_lead
    - a0_lead
    - v0_host
    - gap
    """
    # If the host speed is zero, always return 0.0
    if "v0_host" in kwargs:
        if kwargs["v0_host"] <= 0.0:
            return 0.0
    
    min_sim = 10
    max_sim = 100
    results = np.zeros(max_sim)
    for i in range(max_sim):
        parameters = get_other_pars(**kwargs)
        results[i] = simulation.simulation(parameters)
        
        if i+1 >= min_sim:
            # If results are all the same, return either 0.0 or 1.0
            if np.std(results[:i+1]) < 1e-8:
                if results[0] > 0.0:
                    return 0.0
                return 1.0
            
            kde_result = KDE(results[:i+1], scaling=True)
            kde_result.compute_bandwidth()
            cdf_zero = kde_result.cdf(np.array([0.0]))[0]
            if np.sqrt(cdf_zero*(1-cdf_zero)/(i+1)) < 0.01:
                break
    
    if np.isnan(cdf_zero):
        asfdasdffd
    return cdf_zero

In [None]:
# Create grid
def grid_pars(interaction):
    if len(interaction['leader']) < 15:
        return np.zeros((0, 4))
    interaction['leader']['ax_savgol'] = filter_signal(interaction['leader']["Acceleration_X"])
    interaction['leader']['vx_savgol'] = filter_signal(interaction['leader']["Velocity_X"])
    pars = pd.DataFrame(interaction["leader"][["vx_savgol", "ax_savgol"]].values,
                        columns=["v0_lead", "a0_lead"], index=interaction["leader"].index)
    interaction['follower']['vx_savgol'] = filter_signal(interaction['follower']["Velocity_X"])
    pars["v0_host"] = interaction["follower"]["vx_savgol"]
    gap = (interaction["leader"]["Position_X"] - interaction["follower"]["Position_X"] - 
           interaction["leader"]["Length"]/2 - interaction["follower"]["Length"]/2)
    gap[gap < 0] = np.exp(-5)  # Lower limit
    pars["loggap"] = np.log(gap)
    return pars

In [None]:
if OVERWRITE or not os.path.exists(NGSIM_PROB_COLLISION):
    parameters = []
    for location in locations:
        parameters += [grid_pars(interaction) for interaction in 
                       all_interactions[location].values()]
    parameters = np.concatenate(parameters)
    
    grid = parameters.copy()
    grid[:, 0] = np.clip(grid[:, 0], 0, 100)
    # grid[:, 1] = np.clip(grid[:, 1], -5, 5)
    grid[:, 2] = np.clip(grid[:, 2], 0, 100)
    grid[:, 3] = np.clip(grid[:, 3], -5, 5)
    grid = np.round(grid / SCALING_GRID)
    grid = np.unique(grid, axis=0)
    grid = grid * SCALING_GRID
else:
    df = pd.read_csv(NGSIM_PROB_COLLISION, index_col=0)
    grid = df[["v0_lead", "a0_lead", "v0_host", "loggap"]].values

In [None]:
# Evaluate the collision probability for the grid
def get_probability_grid_pars(row):
    return get_probability(v0_lead=row[0], a0_lead=row[1],
                           v0_host=row[2], gap=np.exp(row[3]))

In [None]:
if OVERWRITE or not os.path.exists(NGSIM_PROB_COLLISION):
    prob_collision = [get_probability_grid_pars(row) for row in tqdm(grid)]
    df = pd.DataFrame(grid, columns=("v0_lead", "a0_lead", "v0_host", "loggap"))
    df["prob_collision"] = prob_collision
    df.to_csv(NGSIM_PROB_COLLISION)
else:
    prob_collision = df["prob_collision"].values

In [None]:
# Define function for the interpolation
scaling = np.std(grid, axis=0)
grid_scaled = grid / scaling
def prob_ngsim(v0_lead, a0_lead, v0_host, gap):
    tmp = np.array([[v0_lead, a0_lead, v0_host, np.log(gap)]]) / scaling
    sq_distance = dist.cdist(grid_scaled, tmp, metric='sqeuclidean')
    weights = np.exp(-sq_distance / 2 / (0.3**2))  # Bandwidth of .3
    probability = np.dot(prob_collision, weights) / np.sum(weights, axis=0)
    #if probability > 0.3:
    #    print(tmp * scaling)
    #    print(grid_scaled[np.argmax(weights)] * scaling)
    #    aasffa
    return probability

## Method for adding data to grid

In [None]:
def add_data_to_grid(pars):
    """ 
    Use this function in the following manner:
    
    add_data_to_grid(<parameters>)
    """
    global grid, scaling, grid_scaled, prob_collision, df
    pars = np.round(pars / SCALING_GRID)
    pars = np.unique(pars, axis=0)
    pars = pars * SCALING_GRID
    new = [not np.any((grid == pars[i, :]).all(axis=1)) for i in range(len(pars))]
    if not np.any(new):
        return grid, prob_collision
    print("{:d}/{:d} values added".format(np.sum(new), len(new)))
    pars = pars[new, :]
    df_new = pd.DataFrame(pars, columns=("v0_lead", "a0_lead", "v0_host", "loggap"))
    pcol = [get_probability_grid_pars(row) for row in pars]
    df_new["prob_collision"] = pcol
    df = pd.concat((df, df_new), ignore_index=True)
    df.to_csv(NGSIM_PROB_COLLISION)
    grid = df[["v0_lead", "a0_lead", "v0_host", "loggap"]].values
    scaling = np.std(grid, axis=0)
    grid_scaled = grid / scaling
    prob_collision = df["prob_collision"].values

## Try method for scenario 1: No risk

In [None]:
def make_plots(time, v_lead, a_lead, v_host, distance):    
    # Calculate WS prob
    prob_ws = np.zeros_like(time)
    for i, (vh, vl, gap) in enumerate(zip(v_host, v_lead, distance)):
        if vh > vl:
            ttc = gap / (vh - vl)
            prob_ws[i] = ws.prob_collision(ttc, vh-vl)
            
    # Calculate prob with new method
    add_data_to_grid(np.array([v_lead, a_lead, v_host, np.log(distance)]).T)
    prob_new = np.zeros_like(time)
    for i, (vh, vl, al, gap) in enumerate(zip(v_host, v_lead, a_lead, distance)):
        prob_new[i] = prob_ngsim(vl, al, vh, gap)
        
    _, ax1 = plt.subplots()
    ax1.plot(time, v_host, c=WS_COLORS[0], lw=WS_LINEWIDTHS[0], ls=WS_LINESTYLES[0])
    ax1.plot(time, v_lead, c=WS_COLORS[0], lw=WS_LINEWIDTHS[0], ls=WS_LINESTYLES[1])
    ax1.set_xlabel("Time [s]")
    ax1.set_ylabel("Speed [m/s]", color=WS_COLORS[0])
    ax1.grid()
    ax1.set_xlim(time[0], time[-1])
    ax2 = ax1.twinx()
    ax2.plot(time, distance, c=WS_COLORS[1], lw=WS_LINEWIDTHS[0], ls=WS_LINESTYLES[2])
    ax2.set_ylabel("Distance[m]", color=WS_COLORS[1])
    
    ax1_ticks = ax1.get_yticks()[1:-1]
    nticks = len(ax1_ticks)
    n = 1
    while int(np.max(distance)/n)+1 - int(np.min(distance/n)) >= nticks:
        n += 1
    ax2_ticks = (np.arange(nticks)+int(np.min(distance)/n))*n
    ax2.set_yticks(ax2_ticks)
    ax1_ylim = ax1.get_ylim()
    aspect_ratio = (ax2_ticks[-1] - ax2_ticks[0]) / (ax1_ticks[-1] - ax1_ticks[0])
    ax2.set_ylim(ax2_ticks[0] - aspect_ratio * (ax1_ticks[0] - ax1_ylim[0]),
                 ax2_ticks[-1] + aspect_ratio * (ax1_ylim[1] - ax1_ticks[-1]))
    
    plt.subplots()
    plt.plot(time, prob_ws, c=WS_COLORS[0], lw=WS_LINEWIDTHS[0])
    plt.plot(time, prob_new, c=WS_COLORS[1], lw=WS_LINEWIDTHS[1])
    plt.ylim(0, 1)
    plt.xlim(time[0], time[-1])
    plt.xlabel("Time [s]")
    plt.ylabel("Probability of collision")
    plt.grid()  

In [None]:
V0_LEADER = 20
DV_LEADER = 10
A_LEADER = 3
T_LEADER = 3

V0_HOST = 24
DV1_HOST = 16
DV2_HOST = 2
A1_HOST = 4
A2_HOST = 0.5
T_HOST = 2

D_INIT = 40
TMAX = 12

In [None]:
time = np.arange(0, TMAX+0.01, 0.01)

v_lead = V0_LEADER * np.ones_like(time)
a_lead = np.zeros_like(time)
v_lead[time > DV_LEADER/A_LEADER+T_LEADER] = V0_LEADER - DV_LEADER
i = np.logical_and(time > T_LEADER, time <= DV_LEADER/A_LEADER+T_LEADER)
v_lead[i] = DV_LEADER/2*(np.cos(np.pi*(time[i]-T_LEADER)*A_LEADER/DV_LEADER)-1)+V0_LEADER
a_lead[i] = -DV_LEADER/2*np.sin(np.pi*(time[i]-T_LEADER)*A_LEADER/DV_LEADER)*np.pi*A_LEADER/DV_LEADER

v_host = V0_HOST * np.ones_like(time)
a_host = np.zeros_like(time)
t1 = DV1_HOST / A1_HOST + T_HOST
i = np.logical_and(time > T_HOST, time <= t1)
v_host[i] = DV1_HOST/2*(np.cos(np.pi*(time[i]-T_HOST)*A1_HOST/DV1_HOST)-1)+V0_HOST
a_host[i] = -DV1_HOST/2*np.sin(np.pi*(time[i]-T_HOST)*A1_HOST/DV1_HOST)*np.pi*A1_HOST/DV1_HOST
t2 = t1 + DV2_HOST / A2_HOST
i = np.logical_and(time > t1, time <= t2)
v_host[i] = -DV2_HOST/2*(np.cos(np.pi*(time[i]-t1)*A2_HOST/DV2_HOST)-1)+V0_HOST-DV1_HOST
a_host[i] = DV2_HOST/2*np.sin(np.pi*(time[i]-t1)*A2_HOST/DV2_HOST)*np.pi*A2_HOST/DV2_HOST
v_host[time > t2] = V0_HOST - DV1_HOST + DV2_HOST
distance = D_INIT + np.cumsum(v_lead - v_host)*0.01

In [None]:
make_plots(time, v_lead, a_lead, v_host, distance)

## Try method for scenario 2: Risky

In [None]:
V0_LEADER = 20
DV_LEADER = 10
A_LEADER = 3
T_LEADER = 3

V0_HOST = 24
DV1_HOST = 16
DV2_HOST = 2
A1_HOST = 4
A2_HOST = 0.5
T_HOST = 4

D_INIT = 40
TMAX = 12

In [None]:
time = np.arange(0, TMAX+0.01, 0.01)

v_lead = V0_LEADER * np.ones_like(time)
a_lead = np.zeros_like(time)
v_lead[time > DV_LEADER/A_LEADER+T_LEADER] = V0_LEADER - DV_LEADER
i = np.logical_and(time > T_LEADER, time <= DV_LEADER/A_LEADER+T_LEADER)
v_lead[i] = DV_LEADER/2*(np.cos(np.pi*(time[i]-T_LEADER)*A_LEADER/DV_LEADER)-1)+V0_LEADER
a_lead[i] = -DV_LEADER/2*np.sin(np.pi*(time[i]-T_LEADER)*A_LEADER/DV_LEADER)*np.pi*A_LEADER/DV_LEADER

v_host = V0_HOST * np.ones_like(time)
a_host = np.zeros_like(time)
t1 = DV1_HOST / A1_HOST + T_HOST
i = np.logical_and(time > T_HOST, time <= t1)
v_host[i] = DV1_HOST/2*(np.cos(np.pi*(time[i]-T_HOST)*A1_HOST/DV1_HOST)-1)+V0_HOST
a_host[i] = -DV1_HOST/2*np.sin(np.pi*(time[i]-T_HOST)*A1_HOST/DV1_HOST)*np.pi*A1_HOST/DV1_HOST
t2 = t1 + DV2_HOST / A2_HOST
i = np.logical_and(time > t1, time <= t2)
v_host[i] = -DV2_HOST/2*(np.cos(np.pi*(time[i]-t1)*A2_HOST/DV2_HOST)-1)+V0_HOST-DV1_HOST
a_host[i] = DV2_HOST/2*np.sin(np.pi*(time[i]-t1)*A2_HOST/DV2_HOST)*np.pi*A2_HOST/DV2_HOST
v_host[time > t2] = V0_HOST - DV1_HOST + DV2_HOST
distance = D_INIT + np.cumsum(v_lead - v_host)*0.01

In [None]:
make_plots(time, v_lead, a_lead, v_host, distance)

## Try method for scenario 3: Collision

In [None]:
V0_LEADER = 20
DV_LEADER = 10
A_LEADER = 3
T_LEADER = 3

V0_HOST = 25
DV1_HOST = 16
DV2_HOST = 2
A1_HOST = 5
A2_HOST = 0.5
T_HOST = 4

D_INIT = 34.933
TMAX = 6

In [None]:
time = np.arange(0, TMAX+0.01, 0.01)

v_lead = V0_LEADER * np.ones_like(time)
a_lead = np.zeros_like(time)
v_lead[time > DV_LEADER/A_LEADER+T_LEADER] = V0_LEADER - DV_LEADER
i = np.logical_and(time > T_LEADER, time <= DV_LEADER/A_LEADER+T_LEADER)
v_lead[i] = DV_LEADER/2*(np.cos(np.pi*(time[i]-T_LEADER)*A_LEADER/DV_LEADER)-1)+V0_LEADER
a_lead[i] = -DV_LEADER/2*np.sin(np.pi*(time[i]-T_LEADER)*A_LEADER/DV_LEADER)*np.pi*A_LEADER/DV_LEADER

v_host = V0_HOST * np.ones_like(time)
a_host = np.zeros_like(time)
t1 = DV1_HOST / A1_HOST + T_HOST
i = np.logical_and(time > T_HOST, time <= t1)
v_host[i] = DV1_HOST/2*(np.cos(np.pi*(time[i]-T_HOST)*A1_HOST/DV1_HOST)-1)+V0_HOST
a_host[i] = -DV1_HOST/2*np.sin(np.pi*(time[i]-T_HOST)*A1_HOST/DV1_HOST)*np.pi*A1_HOST/DV1_HOST
t2 = t1 + DV2_HOST / A2_HOST
i = np.logical_and(time > t1, time <= t2)
v_host[i] = -DV2_HOST/2*(np.cos(np.pi*(time[i]-t1)*A2_HOST/DV2_HOST)-1)+V0_HOST-DV1_HOST
a_host[i] = DV2_HOST/2*np.sin(np.pi*(time[i]-t1)*A2_HOST/DV2_HOST)*np.pi*A2_HOST/DV2_HOST
v_host[time > t2] = V0_HOST - DV1_HOST + DV2_HOST
distance = D_INIT + np.cumsum(v_lead - v_host)*0.01

In [None]:
make_plots(time, v_lead, a_lead, v_host, distance)

# Calculate partial derivatives of our method

In [None]:
TOLERANCE = 0.02
MIN_SIMULATIONS = 100
MAX_SIMULATIONS = 500
FIXED_PARAMETERS = dict(a0_lead=0)
VARIABLE_PARAMETERS = dict(v0_lead=(9.8, 10, 10.2),
                           v0_host=(19.8, 20, 20.2),
                           gap=(19.5, 20, 20.5),
                           amin=(-4.8, -5, -5.2),
                           tr=(.90, .95, 1.0))

In [None]:
simulation.tolerance = TOLERANCE
simulation.min_simulations = MIN_SIMULATIONS
simulation.max_simulations = MAX_SIMULATIONS

In [None]:
np.random.seed(0)
parameters = FIXED_PARAMETERS.copy()
for key, values in VARIABLE_PARAMETERS.items():
    parameters[key] = values[1]
print("Default simulation result: {:.3f}".format(get_probability(**parameters)))
for key, values in VARIABLE_PARAMETERS.items():
    print()
    results = [0, 0]
    for i, value in enumerate([values[0], values[2]]):
        parameters[key] = value
        results[i] = get_probability(**parameters)
        print("Change '{:s}' to {}, result: {:.3f}".format(key, value, results[i]))
    parameters[key] = values[1]  # Change back to default value
    
    print("Partial derivative for {:s}: {:.3f}"
          .format(key, (results[1]-results[0])/(values[2]-values[0])))

In [None]:
results[1] - results[0]

In [None]:
key, (results[1]-result[0])/(values[2]-values[0])