In [None]:
import os
import matplotlib.pyplot as plt
import multiprocessing as mp
import numpy as np
import pandas as pd
import pickle
import scipy.signal
import scipy.spatial.distance as dist
from tqdm import tqdm
from simulation import SimulationLongitudinal, LeaderInteraction, LeaderInteractionParameters, \
    IDMPlus, IDMParameters, WangStamatiadis
from load_interactions import load_interaction, save_interaction
from stats import KDE, kde_from_file
%matplotlib inline

# Estimate probability density function

In [None]:
# Load data.
filename = os.path.join("data", "8_interactions_v3", "interactions_wi10.pkl")
with open(filename, 'rb') as file:
    all_interactions = pickle.load(file)
locations = sorted(all_interactions.keys())

In [None]:
# Filter for speed and acceleration
def filter_signal(signal):
    return scipy.signal.savgol_filter(signal, 15, 1)

In [None]:
# Show example
y = all_interactions[locations[0]][sorted(all_interactions[locations[0]].keys())[3]]['leader']
y['vx_savgol'] = filter_signal(y["Velocity_X"])
i = y.index[scipy.signal.find_peaks(-y['vx_savgol'], prominence=1)[0]]
plt.plot(y['vx_savgol'])
plt.plot(y.loc[i, 'vx_savgol'], 'r.', ms=20)
plt.xlabel("Time [s]")
plt.ylabel("Speed [m/s]")

In [None]:
def get_pars(vel_acc):
    if len(vel_acc) < 15:
        return np.zeros((0, 4))
    if np.max(np.abs(np.diff(vel_acc['Velocity_X']))) > 1.5:
        return np.zeros((0, 4))
    
    data = vel_acc.copy()
    data['ax_savgol'] = filter_signal(data["Acceleration_X"])
    data['vx_savgol'] = filter_signal(data["Velocity_X"])
    i = data.index[scipy.signal.find_peaks(-data['vx_savgol'], prominence=1)[0]]
    data['endspeed'] = np.nan
    data['endtime'] = np.nan
    data.loc[i, 'endspeed'] = data.loc[i, 'vx_savgol']
    data.loc[i, 'endtime'] = i
    data = data.fillna(method='backfill')
    data = data.dropna()

    data['duration'] = data['endtime'] - data.index
    data['vdiff'] = data['endspeed'] - data['vx_savgol']
    data['amean'] = data['vdiff'] / data['duration']
    data = data.drop(i)
    return data[['vx_savgol', 'ax_savgol', 'vdiff', 'amean']].values[::10]

In [None]:
filename_kde = os.path.join("data", "6_kde", "NGSIM_lead_interaction2.p")
overwrite = False
if overwrite or not os.path.exists(filename_kde):
    parameters = []
    for location in locations:
        parameters += [get_pars(interaction['leader']) for interaction in 
                       all_interactions[location].values()]
    kde = KDE(np.concatenate(parameters), scaling=True)
    kde.clustering(kde._maxdist()*5)
    kde.compute_bandwidth()
    print("Bandwidth: {:.4f}".format(kde.bandwidth))
    kde.pickle(filename_kde)
else:
    kde = kde_from_file(filename_kde)

In [None]:
def plot_data(i, j):
    labels = ['Lead speed [m/s]', 'Lead acceleration [m/s$^2$]', 
              'Speed difference [m/s]', 'Mean acceleration [m/s$^2$]']
    data = kde.data[:, [i, j]]
    udata = np.unique(data, axis=0)
    counts = [np.sum(kde.data_helpers.weights[np.where(np.all(data == udata[i], axis=1))])
              for i in range(len(udata))]
    udata[:, 0] = udata[:, 0]*kde.data_helpers.std[i]
    udata[:, 1] = udata[:, 1]*kde.data_helpers.std[j]
    f, ax = plt.subplots(figsize=(8, 6))
    cs = ax.scatter(udata[:, 0], udata[:, 1], c=np.log(counts))
    ax.set_xlabel(labels[i])
    ax.set_ylabel(labels[j])
    cbar = f.colorbar(cs)
    cbar.set_label("log $n$")
plot_data(0, 1)

In [None]:
plot_data(2, 3)

# Simulation

In [None]:
def leader_parameters(**kwargs):
    return LeaderInteractionParameters(init_position=kwargs["gap"],
                                       init_speed=kwargs["v0_lead"],
                                       init_acceleration=kwargs["a0_lead"],
                                       speed_difference=kwargs["dv"],
                                       duration=kwargs["duration"])

In [None]:
def follower_parameters(**kwargs):
    return IDMParameters(amin=kwargs["amin"],
                         speed=kwargs["v0_host"],
                         n_reaction=int(kwargs["tr"]*100),
                         init_speed=kwargs["v0_host"],
                         init_position=0)

In [None]:
s = SimulationLongitudinal(LeaderInteraction(), leader_parameters, IDMPlus(), follower_parameters)
s.min_simulation_time = 2

In [None]:
s.simulation(dict(gap=30, v0_lead=20, a0_lead=-1, dv=-10, duration=5, v0_host=25, amin=-8, tr=1),
             plot=True)

# Multiple simulations

In [None]:
def get_other_pars(**kwargs):
    # Get the speed difference and the mean acceleration from the KDE.
    while True:
        (kwargs["dv"], kwargs["amean"]), = kde.conditional_sample([0, 1], [kwargs["v0_lead"], 
                                                                           kwargs["a0_lead"]])
        if np.sign(kwargs["dv"]) == np.sign(kwargs["amean"]):
            break
    kwargs["duration"] = kwargs["dv"] / kwargs["amean"]
    
    # Get reaction time from a lognormal distribution with mean=.92, std=0.28
    kwargs["tr"] = np.random.lognormal(np.log(.92), .28)
    
    # Get the braking capacity from a truncated normal distribution
    while True:
        kwargs["amin"] = np.random.normal(-9.7, 1.3)
        if -12.7 < kwargs["amin"] < -4.2:
            break
    
    return kwargs

In [None]:
def get_probability(**kwargs):
    # If the host speed is zero, always return 0.0
    if "v0_host" in kwargs:
        if kwargs["v0_host"] <= 0.0:
            return 0.0
    
    min_sim = 10
    max_sim = 100
    results = np.zeros(max_sim)
    for i in range(max_sim):
        parameters = get_other_pars(**kwargs)
        results[i] = s.simulation(parameters)
        
        if i+1 >= min_sim:
            # If results are all the same, return either 0.0 or 1.0
            if np.std(results[:i+1]) < 1e-8:
                if results[0] > 0.0:
                    return 0.0
                return 1.0
            
            kde_result = KDE(results[:i+1], scaling=True)
            kde_result.compute_bandwidth()
            cdf_zero = kde_result.cdf(np.array([0.0]))[0]
            if np.sqrt(cdf_zero*(1-cdf_zero)/(i+1)) < 0.01:
                break
    return cdf_zero

# Show probabilities depending on 1 variable

In [None]:
pars = dict(a0_lead=0, v0_host=20)
v0_leads = [10, 15, 20]
gaps = np.linspace(1, 25, 25)
for v0_lead in v0_leads:
    pars["v0_lead"] = v0_lead
    results = [get_probability(gap=gap, **pars) for gap in gaps]
    plt.plot(gaps, results, label="$v_{0,\mathrm{lead}}$="+"{:.0f} km/h, ".format(pars["v0_lead"]*3.6))
plt.xlabel("Initial gap [m]")
plt.ylabel("Collision probability")
plt.title("$a_{0,\mathrm{lead}}$=" + "{:.0f} m/s$^2$, ".format(pars["a0_lead"]) +
          "$v_{0,\mathrm{host}}$=" + "{:.0f} km/h".format(pars["v0_host"]*3.6))
plt.legend()

# Create grid to evaluate the collision probability

The parameters are:

- `v0_lead`: The lead speed at $t=0$;
- `a0_lead`: The lead acceleration at $t=0$;
- `v0_host`: The host speed at $t=0$;
- `loggap`: The log of the initial distance between the host and the leader.

In [None]:
def grid_pars(interaction):
    if len(interaction['leader']) < 15:
        return np.zeros((0, 4))
    interaction['leader']['ax_savgol'] = filter_signal(interaction['leader']["Acceleration_X"])
    interaction['leader']['vx_savgol'] = filter_signal(interaction['leader']["Velocity_X"])
    pars = pd.DataFrame(interaction["leader"][["vx_savgol", "ax_savgol"]].values,
                        columns=["v0_lead", "a0_lead"], index=interaction["leader"].index)
    interaction['follower']['vx_savgol'] = filter_signal(interaction['follower']["Velocity_X"])
    pars["v0_host"] = interaction["follower"]["vx_savgol"]
    pars["loggap"] = np.log(interaction["leader"]["Position_X"] - 
                            interaction["follower"]["Position_X"])
    return pars

In [None]:
filename = os.path.join("data", "7_simulation_results", "prob_collision_v2.csv")
overwrite = True
if overwrite or not os.path.exists(filename):
    parameters = []
    for location in locations:
        parameters += [grid_pars(interaction) for interaction in 
                       all_interactions[location].values()]
    parameters = np.concatenate(parameters)
    
    grid = parameters.copy()
    scaling = [2, .5, 2, .25]
    grid[:, 0] = np.clip(grid[:, 0], 0, 26)
    grid[:, 1] = np.clip(grid[:, 1], -5, 5)
    grid[:, 2] = np.clip(grid[:, 2], 0, 26)
    grid[:, 3] = np.clip(grid[:, 3], 0, 5)
    grid = np.round(grid / scaling)
    grid = np.unique(grid, axis=0)
    grid = grid * scaling
else:
    df = pd.read_csv(filename)
    grid = df[["v0_lead", "a0_lead", "v0_host", "loggap"]].values

# Evaluate collision probability for grid

In [None]:
def get_probability_grid_pars(row):
    return get_probability(v0_lead=row[0], a0_lead=row[1],
                           v0_host=row[2], gap=np.exp(row[3]))

In [None]:
if overwrite or not os.path.exists(filename):
    prob_collision = [get_probability_grid_pars(row) for row in tqdm(grid)]
    df = pd.DataFrame(grid, columns=("v0_lead", "a0_lead", "v0_host", "loggap"))
    df["prob_collision"] = prob_collision
    df.to_csv(filename)
else:
    prob_collision = df["prob_collision"].values

# Interpolate collision probability for an interaction

In [None]:
scaling = np.std(grid, axis=0)
grid_scaled = grid / scaling

In [None]:
def prob_interaction(interaction):
    scaled_parameters = grid_pars(interaction) / scaling
    if scaled_parameters.size == 0:
        return
    sq_distance = dist.cdist(grid_scaled, scaled_parameters.loc[interaction['scores'].index], 
                             metric='sqeuclidean')
    weights = np.exp(-sq_distance / 2 / (0.3**2))  # Bandwidth of .3
    probability = np.dot(prob_collision, weights) / np.sum(weights, axis=0)
    return probability

In [None]:
for location in locations:
    for interaction in all_interactions[location].values():
        if "scores" not in interaction:
            continue
        probs = prob_interaction(interaction)
        if probs is not None:
            interaction['scores']["prob_collision"] = probs

In [None]:
i = 0
keys = sorted(all_interactions[locations[i]].keys())
maxx = 0
for j in range(len(keys)):
    interaction = all_interactions[locations[i]][keys[j]]
    if "scores" not in interaction:
        continue
    if "prob_collision" not in interaction["scores"]:
        continue
    if np.max(interaction["scores"]["prob_collision"]) > .3:
        maxx = np.max(interaction["scores"]["prob_collision"])
        print(j, maxx, len(interaction["scores"]))

In [None]:
i = 0
j = 1199
keys = sorted(all_interactions[locations[i]].keys())
interaction = all_interactions[locations[i]][keys[j]]
plt.plot(interaction["scores"]["prob_collision"], label="$P(C|x)$")
plt.plot(interaction["scores"]["Anomaly_Reconstruction"], label="Anomaly")
plt.xlabel("Time [s]")
plt.ylabel("Score")
plt.legend()

# Recalculate THW and TTC

In [None]:
interaction["leader"].keys()

In [None]:
for location in locations:
    for interaction in all_interactions[location].values():
        if "scores" not in interaction:
            continue
        lead_speed = interaction["leader"].loc[interaction["scores"].index, "Velocity_X"]
        follow_speed = interaction["follower"].loc[interaction["scores"].index, "Velocity_X"]
        lead_x = interaction["leader"].loc[interaction["scores"].index, "Position_X"]
        follow_x = interaction["follower"].loc[interaction["scores"].index, "Position_X"]
        ttc = (lead_x - follow_x) / (lead_speed - follow_speed)
        ttc[ttc < 0] = 20
        ttc[ttc > 20] = 20
        interaction["scores"]["TTC2"] = ttc
        interaction["scores"]["THW2"] = np.abs((lead_x - follow_x) / lead_speed)
        if "vx_savgol" not in interaction["follower"]:
            continue
        lead_speed = interaction["leader"].loc[interaction["scores"].index, "vx_savgol"]
        follow_speed = interaction["follower"].loc[interaction["scores"].index, "vx_savgol"]
        ttc = (lead_x - follow_x) / (lead_speed - follow_speed)
        ttc[ttc < 0] = 20
        ttc[ttc > 20] = 20
        interaction["scores"]["TTC2_filtered"] = ttc
        interaction["scores"]["THW2_filtered"] = np.abs((lead_x - follow_x) / lead_speed)

# Add Wang-Stamatiadis score

In [None]:
WS = WangStamatiadis()

In [None]:
for location in locations:
    for interaction in all_interactions[location].values():
        if "scores" not in interaction:
            continue
        interaction['scores']["CPM_TTC"] = interaction['scores']['TTC'].apply(WS.groupa)
        interaction['scores']["CPM_TTC2"] = interaction['scores']['TTC2'].apply(WS.groupa)
        if "vx_savgol" in interaction["follower"]:
            interaction['scores']["CPM_TTC2_filtered"] = \
                interaction['scores']['TTC2_filtered'].apply(WS.groupa)
            interaction['scores']['CPM_WS'] = \
                [WS.prob_collision(ttc, vhost) for vhost, ttc in
                 zip(interaction['follower'].loc[interaction['scores'].index, 'vx_savgol'].values -
                     interaction['leader'].loc[interaction['scores'].index, 'vx_savgol'].values,
                     interaction['scores']['TTC'].values)]
            interaction['scores']['CPM_WS2'] = \
                [WS.prob_collision(ttc, vhost) for vhost, ttc in
                 zip(interaction['follower'].loc[interaction['scores'].index, 'Velocity_X'].values -
                     interaction['leader'].loc[interaction['scores'].index, 'Velocity_X'].values,
                     interaction['scores']['TTC2'].values)]
            interaction['scores']['CPM_WS2_filtered'] = \
                [WS.prob_collision(ttc, vhost) for vhost, ttc in
                 zip(interaction['follower'].loc[interaction['scores'].index, 'vx_savgol'].values -
                     interaction['leader'].loc[interaction['scores'].index, 'vx_savgol'].values,
                     interaction['scores']['TTC2_filtered'].values)]

In [None]:
i = 0
keys = sorted(all_interactions[locations[i]].keys())
maxx = 0
for j in range(len(keys)):
    interaction = all_interactions[locations[i]][keys[j]]
    if "scores" not in interaction:
        continue
    if "prob_collision" not in interaction["scores"]:
        continue
    if np.max(interaction["scores"]["CPM_TTC2"]) > .3:
        maxx = np.max(interaction["scores"]["CPM_TTC2"])
        print(j, maxx, len(interaction["scores"]), np.max(interaction["scores"]["prob_collision"]))

In [None]:
i = 0
j = 899
keys = sorted(all_interactions[locations[i]].keys())
interaction = all_interactions[locations[i]][keys[j]]
plt.plot(interaction["scores"]["prob_collision"], label="$P(C|x)$")
plt.plot(interaction["scores"]["CPM_TTC2"], label="WS")
plt.xlabel("Time [s]")
plt.ylabel("Score")
plt.legend()

In [None]:
interaction["scores"].loc[interaction["scores"]["CPM_TTC2"].idxmax()]

# Save data

In [None]:
filename = os.path.join("data", "8_interactions_v3", "interactions_wi10.pkl")
with open(filename, 'wb') as file:
    pickle.dump(all_interactions, file)

# Aggregate all data for comparison

In [None]:
columns = ['TTC', 'CPM_TTC2', 'CPM_WS2', 'prob_collision', 'Anomaly_Reconstruction']
scores = []
for location in locations:
    for interaction in all_interactions[location].values():
        if "scores" not in interaction:
            continue
        if "prob_collision" not in interaction['scores']:
            continue
        scores.append(interaction['scores'][columns])

In [None]:
df_scores = pd.DataFrame(np.concatenate(scores), columns=columns)

In [None]:
# Plot the percentiles
quantiles = df_scores.quantile(np.linspace(.99, 1, 50))
quantiles[["CPM_TTC2", "CPM_WS2", "prob_collision"]].plot()

In [None]:
plt.hist(df_scores["Anomaly_Reconstruction"])