# Experiments for paper on surrogate metric

In [None]:
import os
import matplotlib.pyplot as plt
import matplotlib.ticker
import numpy as np
import pandas as pd
import pickle
import scipy.signal
import scipy.spatial.distance as dist
from tqdm import tqdm_notebook as tqdm
from simulation import WangStamatiadis, WSDriver, SimulationApproaching, ws_approaching_pars, \
    LeaderInteraction, IDMPlus, SimulationLongitudinal, IDMParameters, LeaderInteractionParameters
from stats import KDE, kde_from_file

### Parameters

In [None]:
OVERWRITE = False

## Show that our metric is a generalisation of W&S' metric

In [None]:
N_MONTE_CARLO_WS = 500
WS_TTCS = np.linspace(.5, 4, 36)
WS_SPEED_DIFFS = [10, 20, 30]
WS_LINESTYLES = ['-', '--', ':']
WS_COLORS = [(0, 0, 0), (.5, .5, .5)]
WS_LINEWIDTHS = [3, 3]
WS_FILENAME = os.path.join("data", "7_simulation_results", "ws_comparison")

In [None]:
driver = WSDriver()
ws = WangStamatiadis()

In [None]:
def ws_comparison(tolerance=0.2):
    simulation = SimulationApproaching([driver], [ws_approaching_pars], tolerance=tolerance,
                                   max_simulation=500)
    simulation.min_simulations = 20
    def ws_with_our_method(ttc: float, speed_diff: float):
        return simulation.get_probability(dict(vego=speed_diff, ratio_vtar_vego=0, 
                                               init_position=ttc*speed_diff))

    
    FILENAME = WS_FILENAME + "_tol{:.0f}.p".format(100*tolerance)
    if os.path.exists(FILENAME):
        with open(FILENAME, "rb") as file:
            result = pickle.load(file)
    if not os.path.exists(FILENAME) or OVERWRITE or \
            not result.shape == (len(WS_TTCS), len(WS_SPEED_DIFFS), 2):
        result = np.zeros((len(WS_TTCS), len(WS_SPEED_DIFFS), 2))
        np.random.seed(0)
        for i, ttc in enumerate(WS_TTCS):
            for j, speed_diff in enumerate(WS_SPEED_DIFFS):
                result[i, j, 0] = ws.prob_collision(ttc, speed_diff)
                result[i, j, 1] = ws_with_our_method(ttc, speed_diff)
        with open(FILENAME, "wb") as file:
            pickle.dump(result, file)
    
    for i, speed_diff in enumerate(WS_SPEED_DIFFS):
        plt.plot(WS_TTCS, result[:, i, 0], ls=WS_LINESTYLES[i], color=WS_COLORS[0], 
                 lw=WS_LINEWIDTHS[0])
        plt.plot(WS_TTCS, result[:, i, 1], ls=WS_LINESTYLES[i], color=WS_COLORS[1],
                 lw=WS_LINEWIDTHS[1])
    plt.xlabel("TTC [s]")
    plt.ylabel("$P(C|x)$ according to the WS metric")
    plt.xlim(WS_TTCS[0], WS_TTCS[-1])
    plt.ylim(0, 1)
    plt.grid()

In [None]:
ws_comparison(0.2)

In [None]:
ws_comparison(0.02)

## Apply method on NGSIM data set to create a risk metric

In [None]:
NGSIM_DATA = os.path.join("data", "8_interactions_v3", 
                          "dNGSIM_iLongitudinal_mFull_Reconstruction_e100_wi1_w1_hs32_bs2",
                          "interactions.pkl")
NGSIM_KDE = os.path.join("data", "6_kde", "NGSIM2.p")
NGSIM_PROB_COLLISION = os.path.join("data", "7_simulation_results", 
                                    "prob_collision_NGSIM2.csv")
SCALING_GRID = [2, .5, 2, .25]

DELTA_T = 0.1
NHORIZON = 50
D_SVD = 4

In [None]:
# Load data.
with open(NGSIM_DATA, 'rb') as file:
    all_interactions = pickle.load(file)
locations = sorted(all_interactions.keys())

In [None]:
# Filter for speed and acceleration
def filter_signal(signal):
    return scipy.signal.savgol_filter(signal, 15, 1)

In [None]:
def get_pars(vel_acc):
    if len(vel_acc) < 15:
        return np.zeros((0, NHORIZON+2))
    if np.max(np.abs(np.diff(vel_acc['Velocity_X']))) > 1.5:
        return np.zeros((0, NHORIZON+2))
    
    data = vel_acc.copy()
    data['ax_savgol'] = filter_signal(data["Acceleration_X"])
    data['vx_savgol'] = filter_signal(data["Velocity_X"])
    is_possible = np.logical_and(data.index < data.index[-1] - DELTA_T*NHORIZON,
                                 np.mod(np.arange(len(data)), 10) == 0)
    if not np.any(is_possible):
        return np.zeros((0, NHORIZON+2))
    delta_ts = np.array([np.arange(NHORIZON+1)*DELTA_T]*np.sum(is_possible))
    times = np.array([data.loc[is_possible].index]*(NHORIZON+1)).T
    speeds = np.interp(times+delta_ts, data.index, data['vx_savgol'])
    accelerations = data.loc[is_possible, "ax_savgol"]
    return np.concatenate((np.atleast_2d(accelerations).T, speeds), axis=1)

In [None]:
parameters = []
for location in locations:
    parameters += [get_pars(interaction['leader']) for interaction in 
                   all_interactions[location].values()]
pars = np.concatenate(parameters).T
mean_pars = np.mean(pars, axis=1)

u, s, vt = np.linalg.svd((pars.T-mean_pars).T, full_matrices=False)
v1t = vt[:D_SVD]
s1 = s[:D_SVD]
u1 = u[:, :D_SVD]
u11 = u[:2, :D_SVD]

kde = KDE(v1t.T, scaling=True)
kde.set_bandwidth(kde.silverman())
kde.constrained_sample(matrix=u11*s1, vector=[0.0, 10.0])
kde.pickle(NGSIM_KDE)

In [None]:
def plot_speed_profiles(init_acc, init_vel, n_plots=50):
    np.random.seed(0)
    samples = kde.constrained_sample(n_samples=n_plots, 
                                     vector=[init_acc-mean_pars[0], init_vel-mean_pars[1]])
    vprofiles = np.dot(samples*s1, u1.T)[:, 1:] + mean_pars[1:]
    for vprofile in vprofiles:
        plt.plot(np.arange(NHORIZON+1)*DELTA_T, vprofile, c=(.4, .4, .4))
    plt.xlabel("Time [s]")
    plt.ylabel("Speed [m/s]")
    plt.title("Initial acceleration: {:.0f} m/s$^2$, initial speed: {:.0f} m/s"
              .format(init_acc, init_vel))
plot_speed_profiles(1.0, 15.0)

In [None]:
plot_speed_profiles(-1, 15.0)

In [None]:
def leader_parameters(**kwargs):
    return LeaderInteractionParameters(init_position=kwargs["gap"],
                                       init_speed=kwargs["v0_lead"],
                                       velocities=kwargs["velocities"],
                                       times=kwargs["times"])

def follower_parameters(**kwargs):
    return IDMParameters(amin=kwargs["amin"],
                         speed=kwargs["v0_host"],
                         n_reaction=int(kwargs["tr"]*100),
                         init_speed=kwargs["v0_host"],
                         init_position=0)

def get_other_pars(**kwargs):
    # Get the speed difference and the mean acceleration from the KDE.
    sample = kde.constrained_sample(n_samples=1, 
                                    vector=[kwargs["a0_lead"]-mean_pars[0], 
                                            kwargs["v0_lead"]-mean_pars[1]])
    kwargs["velocities"] = (np.dot(sample*s1, u1.T)[:, 1:] + mean_pars[1:])[0]
    kwargs["times"] = np.arange(NHORIZON+1)*DELTA_T    
    
    # Get reaction time from a lognormal distribution with mean=.92, std=0.28
    if "tr" not in kwargs:
        kwargs["tr"] = np.random.lognormal(np.log(.92**2 / np.sqrt(.92**2 + .28**2)), 
                                           np.sqrt(np.log(1 + .28**2/.92**2)))
    
    # Get the braking capacity from a truncated normal distribution
    if "amin" not in kwargs:
        while True:
            kwargs["amin"] = np.random.normal(-8.45, 1.4)
            if -12.68 < kwargs["amin"] < -4.23:
                break
    
    return kwargs

simulation = SimulationLongitudinal(LeaderInteraction(), leader_parameters,
                                    IDMPlus(), follower_parameters)
simulation.min_simulation_time = 5

def get_probability(plot=False, **kwargs):
    """
    Parameters to provide:
    - v0_lead
    - a0_lead
    - v0_host
    - gap
    """
    # If the host speed is zero, always return 0.0
    if "v0_host" in kwargs:
        if kwargs["v0_host"] <= 0.0:
            return 0.0
    
    min_sim = 10
    max_sim = 100
    results = np.zeros(max_sim)
    for i in range(max_sim):
        parameters = get_other_pars(**kwargs)
        results[i] = simulation.simulation(parameters)
        
        if i+1 >= min_sim:
            # If results are all the same, return either 0.0 or 1.0
            if np.std(results[:i+1]) < 1e-8:
                if results[0] > 0.0:
                    return 0.0
                return 1.0
            
            kde_result = KDE(results[:i+1], scaling=True)
            kde_result.compute_bandwidth()
            cdf_zero = kde_result.cdf(np.array([0.0]))[0]
            if np.sqrt(cdf_zero*(1-cdf_zero)/(i+1)) < 0.1:
                break
    
    if np.isnan(cdf_zero):
        asfdasdffd
        
    

    if plot:
        _, axes = plt.subplots(1, 1)
        minx = min(np.min(results[:i+1]), 0) - 2*kde_result.get_bandwidth()
        maxx = max(np.max(results[:i+1]), 0) + 2*kde_result.get_bandwidth()
        x_cdf = np.linspace(minx, maxx)
        y_cdf = kde_result.cdf(x_cdf)
        axes.plot(x_cdf, y_cdf)
        axes.set_xlim(minx, maxx)
        axes.plot(results[:i+1], np.zeros(i+1), '|')
        axes.set_title("N={:d}, F(0)={:.3f} +/- {:.3f}".format(i, cdf_zero,
                                                               np.sqrt(cdf_zero*(1-cdf_zero)/(i+1))))
        
    return cdf_zero

In [None]:
# Show result of single simulation.
np.random.seed(0)
simulation.simulation(get_other_pars(v0_lead=15, a0_lead=-1, v0_host=20, gap=10), plot=True)

In [None]:
i = -2
prob_ngsim(v_lead[i], a_lead[i], v_host[i], distance[i])

In [None]:
%debug

In [None]:
# Show result of calculating probability.
np.random.seed(1)
get_probability(v0_lead=10, a0_lead=-1, 
                v0_host=12, gap=np.exp(-3.5), plot=True)

In [None]:
# Create grid
def grid_pars(interaction):
    if len(interaction['leader']) < 15:
        return np.zeros((0, 4))
    interaction['leader']['ax_savgol'] = filter_signal(interaction['leader']["Acceleration_X"])
    interaction['leader']['vx_savgol'] = filter_signal(interaction['leader']["Velocity_X"])
    pars = pd.DataFrame(interaction["leader"][["vx_savgol", "ax_savgol"]].values,
                        columns=["v0_lead", "a0_lead"], index=interaction["leader"].index)
    interaction['follower']['vx_savgol'] = filter_signal(interaction['follower']["Velocity_X"])
    pars["v0_host"] = interaction["follower"]["vx_savgol"]
    gap = (interaction["leader"]["Position_X"] - interaction["follower"]["Position_X"] - 
           interaction["leader"]["Length"]/2 - interaction["follower"]["Length"]/2)
    gap[gap < 0] = np.exp(-5)  # Lower limit
    pars["loggap"] = np.log(gap)
    return pars

In [None]:
if OVERWRITE or not os.path.exists(NGSIM_PROB_COLLISION):
    parameters = []
    for location in locations:
        parameters += [grid_pars(interaction) for interaction in 
                       all_interactions[location].values()]
    parameters = np.concatenate(parameters)
    
    grid = parameters.copy()
    grid[:, 0] = np.clip(grid[:, 0], 0, 100)
    # grid[:, 1] = np.clip(grid[:, 1], -5, 5)
    grid[:, 2] = np.clip(grid[:, 2], 0, 100)
    grid[:, 3] = np.clip(grid[:, 3], -5, 5)
    grid = np.round(grid / SCALING_GRID)
    grid = np.unique(grid, axis=0)
    grid = grid * SCALING_GRID
else:
    df = pd.read_csv(NGSIM_PROB_COLLISION, index_col=0)
    grid = df[["v0_lead", "a0_lead", "v0_host", "loggap"]].values

In [None]:
# Evaluate the collision probability for the grid
def get_probability_grid_pars(row):
    return get_probability(v0_lead=row[0], a0_lead=row[1],
                           v0_host=row[2], gap=np.exp(row[3]))

In [None]:
if OVERWRITE or not os.path.exists(NGSIM_PROB_COLLISION):
    prob_collision = [get_probability_grid_pars(row) for row in tqdm(grid)]
    df = pd.DataFrame(grid, columns=("v0_lead", "a0_lead", "v0_host", "loggap"))
    df["prob_collision"] = prob_collision
    df.to_csv(NGSIM_PROB_COLLISION)
else:
    prob_collision = df["prob_collision"].values

In [None]:
plt.plot(prob_collision, '.')

In [None]:
# Define function for the interpolation
scaling = np.std(grid, axis=0)
bandwidth = 0.30  # Very close to the Silverman's rule of thumb
grid_scaled = grid / scaling
def prob_ngsim(v0_lead, a0_lead, v0_host, gap):
    tmp = np.array([[v0_lead, a0_lead, v0_host, np.log(gap)]]) / scaling
    sq_distance = dist.cdist(grid_scaled, tmp, metric='sqeuclidean')
    weights = np.exp(-sq_distance / 2 / (bandwidth**2))  # Bandwidth of .3
    probability = np.dot(prob_collision, weights) / np.sum(weights, axis=0)
    #if probability > 0.3:
    #    print(tmp * scaling)
    #    print(grid_scaled[np.argmax(weights)] * scaling)
    #    aasffa
    return probability
print("Bandwidth matrix has at diagonal:")
print((bandwidth / scaling)**2)

## Method for adding data to grid

In [None]:
def add_data_to_grid(pars):
    """ 
    Use this function in the following manner:
    
    add_data_to_grid(<parameters>)
    """
    global grid, scaling, grid_scaled, prob_collision, df
    pars = np.round(pars / SCALING_GRID)
    pars = np.unique(pars, axis=0)
    pars = pars * SCALING_GRID
    new = [not np.any((grid == pars[i, :]).all(axis=1)) for i in range(len(pars))]
    if not np.any(new):
        return grid, prob_collision
    print("{:d}/{:d} values added".format(np.sum(new), len(new)))
    pars = pars[new, :]
    df_new = pd.DataFrame(pars, columns=("v0_lead", "a0_lead", "v0_host", "loggap"))
    pcol = [get_probability_grid_pars(row) for row in pars]
    df_new["prob_collision"] = pcol
    df = pd.concat((df, df_new), ignore_index=True)
    df.to_csv(NGSIM_PROB_COLLISION)
    grid = df[["v0_lead", "a0_lead", "v0_host", "loggap"]].values
    scaling = np.std(grid, axis=0)
    grid_scaled = grid / scaling
    prob_collision = df["prob_collision"].values

## Try method for scenario 1: No risk

In [None]:
def make_plots(time, v_lead, a_lead, v_host, distance):    
    # Calculate WS prob
    prob_ws = np.zeros_like(time)
    for i, (vh, vl, gap) in enumerate(zip(v_host, v_lead, distance)):
        if vh > vl:
            ttc = gap / (vh - vl)
            prob_ws[i] = ws.prob_collision(ttc, vh-vl)
            
    # Calculate prob with new method
    add_data_to_grid(np.array([v_lead, a_lead, v_host, np.log(distance)]).T)
    prob_new = np.zeros_like(time)
    for i, (vh, vl, al, gap) in enumerate(zip(v_host, v_lead, a_lead, distance)):
        prob_new[i] = prob_ngsim(vl, al, vh, gap)
        
    _, ax1 = plt.subplots()
    ax1.plot(time, v_host, c=WS_COLORS[0], lw=WS_LINEWIDTHS[0], ls=WS_LINESTYLES[0])
    ax1.plot(time, v_lead, c=WS_COLORS[0], lw=WS_LINEWIDTHS[0], ls=WS_LINESTYLES[1])
    ax1.set_xlabel("Time [s]")
    ax1.set_ylabel("Speed [m/s]", color=WS_COLORS[0])
    ax1.grid()
    ax1.set_xlim(time[0], time[-1])
    ax2 = ax1.twinx()
    ax2.plot(time, distance, c=WS_COLORS[1], lw=WS_LINEWIDTHS[0], ls=WS_LINESTYLES[2])
    ax2.set_ylabel("Distance[m]", color=WS_COLORS[1])
    
    ax1_ticks = ax1.get_yticks()[1:-1]
    nticks = len(ax1_ticks)
    n = 1
    while int(np.max(distance)/n)+1 - int(np.min(distance/n)) >= nticks:
        n += 1
    ax2_ticks = (np.arange(nticks)+int(np.min(distance)/n))*n
    ax2.set_yticks(ax2_ticks)
    ax1_ylim = ax1.get_ylim()
    aspect_ratio = (ax2_ticks[-1] - ax2_ticks[0]) / (ax1_ticks[-1] - ax1_ticks[0])
    ax2.set_ylim(ax2_ticks[0] - aspect_ratio * (ax1_ticks[0] - ax1_ylim[0]),
                 ax2_ticks[-1] + aspect_ratio * (ax1_ylim[1] - ax1_ticks[-1]))
    
    plt.subplots()
    plt.plot(time, prob_ws, c=WS_COLORS[0], lw=WS_LINEWIDTHS[0])
    plt.plot(time, prob_new, c=WS_COLORS[1], lw=WS_LINEWIDTHS[1])
    plt.ylim(0, 1)
    plt.xlim(time[0], time[-1])
    plt.xlabel("Time [s]")
    plt.ylabel("Probability of collision")
    plt.grid()  

In [None]:
parms = dict(v0_leader=20, dv_leader=10, a_leader=3, t_leader=3,
             v0_host=24, dv1_host=16, dv2_host=2, a1_host=4, a2_host=0.5, t_host=2,
             d_init=40, tmax=12)

In [None]:
def create_data(v0_leader, dv_leader, a_leader, t_leader,
                v0_host, dv1_host, dv2_host, a1_host, a2_host, t_host,
                d_init, tmax):
    time = np.arange(0, tmax+0.01, 0.01)

    v_lead = v0_leader * np.ones_like(time)
    a_lead = np.zeros_like(time)
    v_lead[time > dv_leader/a_leader+t_leader] = v0_leader - dv_leader
    i = np.logical_and(time > t_leader, time <= dv_leader/a_leader+t_leader)
    v_lead[i] = dv_leader/2*(np.cos(np.pi*(time[i]-t_leader)*a_leader/dv_leader)-1)+v0_leader
    a_lead[i] = (-dv_leader/2*np.sin(np.pi*(time[i]-t_leader)*a_leader/dv_leader) * 
                 np.pi*a_leader/dv_leader)

    v_host = v0_host * np.ones_like(time)
    a_host = np.zeros_like(time)
    t1 = dv1_host / a1_host + t_host
    i = np.logical_and(time > t_host, time <= t1)
    v_host[i] = dv1_host/2*(np.cos(np.pi*(time[i]-t_host)*a1_host/dv1_host)-1)+v0_host
    a_host[i] = -dv1_host/2*np.sin(np.pi*(time[i]-t_host)*a1_host/dv1_host)*np.pi*a1_host/dv1_host
    t2 = t1 + dv2_host / a2_host
    i = np.logical_and(time > t1, time <= t2)
    v_host[i] = -dv2_host/2*(np.cos(np.pi*(time[i]-t1)*a2_host/dv2_host)-1)+v0_host-dv1_host
    a_host[i] = dv2_host/2*np.sin(np.pi*(time[i]-t1)*a2_host/dv2_host)*np.pi*a2_host/dv2_host
    v_host[time > t2] = v0_host - dv1_host + dv2_host
    distance = d_init + np.cumsum(v_lead - v_host)*0.01
    
    return time, v_lead, a_lead, v_host, distance

In [None]:
make_plots(*create_data(**parms))

## Try method for scenario 2: Risky

In [None]:
parms["t_host"] = 4
make_plots(*create_data(**parms))

## Try method for scenario 3: Collision

In [None]:
parms["v0_host"] = 25
parms["a1_host"] = 5
parms["d_init"] = 34.933
parms["tmax"] = 6
make_plots(*create_data(**parms))

In [None]:
make_plots(time, v_lead, a_lead, v_host, distance)

# Calculate partial derivatives of our method

In [None]:
TOLERANCE = 0.02
MIN_SIMULATIONS = 100
MAX_SIMULATIONS = 500
FIXED_PARAMETERS = dict(a0_lead=0)
VARIABLE_PARAMETERS = dict(v0_lead=(9.8, 10, 10.2),
                           v0_host=(19.8, 20, 20.2),
                           gap=(19.5, 20, 20.5),
                           amin=(-4.8, -5, -5.2),
                           tr=(.90, .95, 1.0))

In [None]:
simulation.tolerance = TOLERANCE
simulation.min_simulations = MIN_SIMULATIONS
simulation.max_simulations = MAX_SIMULATIONS

In [None]:
np.random.seed(0)
parameters = FIXED_PARAMETERS.copy()
for key, values in VARIABLE_PARAMETERS.items():
    parameters[key] = values[1]
print("Default simulation result: {:.3f}".format(get_probability(**parameters)))
for key, values in VARIABLE_PARAMETERS.items():
    print()
    results = [0, 0]
    for i, value in enumerate([values[0], values[2]]):
        parameters[key] = value
        results[i] = get_probability(**parameters)
        print("Change '{:s}' to {}, result: {:.3f}".format(key, value, results[i]))
    parameters[key] = values[1]  # Change back to default value
    
    print("Partial derivative for {:s}: {:.3f}"
          .format(key, (results[1]-results[0])/(values[2]-values[0])))