# Test scenario generation

In [None]:
import os
from typing import NamedTuple
import pickle
import matplotlib.pyplot as plt
from tikzplotlib import save
import numpy as np
import ot
from tqdm import tqdm_notebook as tqdm
from scipy.spatial import distance
from scipy.stats import ttest_ind
from sklearn.model_selection import train_test_split
from domain_model import DocumentManagement, StateVariable
from stats import KDE

In [None]:
OVERWRITE = False

# Lead vehicle decelerating

## Load data

In [None]:
leaddec = DocumentManagement(os.path.join("data", "5_scenarios", "lead_braking2.json"))
key_delete = []
for key in leaddec.collections["scenario"]:
    scenario = leaddec.get_item("scenario", key)
    time = np.linspace(scenario.get_tstart(), scenario.get_tend(), 1000)
    speed = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                               StateVariable.LON_TARGET, time)[:, 0]
    tstart = time[np.argmax(speed)]
    tend = time[np.argmin(speed)]
    if tend < tstart + 0.5:
        key_delete.append(key)
        continue
    scenario.start.conditions["time"] = tstart
    scenario.end.conditions["time"] = tend
        
for key in key_delete:
    leaddec.delete_item("scenario", key)
print("Number of scenarios: {:d}".format(len(leaddec.collections["scenario"])))

In [None]:
n = 50
data1 = np.zeros((len(leaddec.collections["scenario"]), n+1))
for i, key in enumerate(leaddec.collections["scenario"]):
    scenario = leaddec.get_item("scenario", key)
    data1[i, n] = scenario.get_duration()
    time = np.linspace(0, data1[i, n], n) + scenario.get_tstart()
    data1[i, :n] = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                                      StateVariable.LON_TARGET, time)[:, 0]
    
# Do the weighting
weights1 = np.zeros(data1.shape[1])
weights1[:n] = 2 / (np.mean(np.std(data1[:, :n], axis=0))*n)
weights1[n] = 1 / np.std(data1[:, :n])
data1 = data1 * weights1
weights1

In [None]:
Nz = len(data1) // 4
N = len(data1) - Nz
Nw = 10000
print(N, Nz, Nw, N+Nz)

## Define method

In [None]:
def score(generated, data):
    return ot.emd2([], [], distance.cdist(generated, data))

In [None]:
def naive_approach(data, n=None):
    i = np.random.choice(np.arange(len(data)), len(data) if n is None else n)
    return data[i, :]

def new_approach(data, d=3, n=None):
    if n is None:
        n = len(data)
    generated = np.zeros((n, data.shape[1]))
    mean = np.mean(data, axis=0)
    u,s,v = np.linalg.svd(data-mean, full_matrices=False)
    k = KDE(u[:, :d], scaling=True)
    # k.set_bandwidth(k.silverman())
    k.compute_bandwidth(max_bw=k.silverman())
    for i in range(n):
        pars = k.sample()[0]
        generated[i] = np.dot(pars*s[:d], v[:d]) + mean
        
    return generated

## Test for different values of d

In [None]:
def test_method(name, data, dmin, dmax, beta=1, seed=0, overwrite=False):
    filename = os.path.join("data", "3_test", 
                            "{:s}_d{:d}-{:d}_seed{:d}.p".format(name, dmin, dmax, seed))
    if os.path.exists(filename) and not overwrite:
        with open(filename, "rb") as file:
            scores1, scores2 = pickle.load(file)
    else:
        scores1 = np.zeros(dmax-dmin+2)
        scores2 = np.zeros_like(scores1)
        
        np.random.seed(seed)
        indexa, indexb = train_test_split(np.arange(len(data)), test_size=Nz, 
                                          random_state=seed)
        dataa, datab = data[indexa, :], data[indexb, :]
        
        generated = naive_approach(dataa, n=Nw)
        scores1[0] = score(generated, datab)
        scores2[0] = score(generated, dataa)
        
        for i in range(dmin, dmax+1):
            generated = new_approach(dataa, d=i, n=Nw)
            scores1[i-dmin+1] = score(generated, datab)
            scores2[i-dmin+1] = score(generated, dataa)
        with open(filename, "wb") as file:
            pickle.dump((scores1, scores2), file)
    
    combined_score = (1+beta)*scores1-beta*scores2
    for i in range(len(scores1)):
        print("q={:2d}: {:7.4f} {:7.4f} {:7.4f}".format(i, scores1[i], scores2[i], combined_score[i]),
              end="")
        if combined_score[i] == np.min(combined_score):
            print("  *")
        else:
            print()

In [None]:
# Show singular values
def plot_show_svd(data):
    s_max = 8
    mean = np.mean(data, axis=0)
    _, s, _ = np.linalg.svd(data-mean, full_matrices=False)
    plt.semilogy(np.arange(len(s))+1, s, '.')
    plt.xlim(0, s_max+.5)
    plt.ylim(s[s_max+1], s[0]*2)
    plt.xlabel("Index")
    plt.ylabel("Singular value")
    
    print(" #  Explained variance [%]")
    for i in range(1, 9):
        print("{:2d} {:.1f} %".format(i, np.sum(s[:i]**2) / np.sum(s**2) * 100))
plot_show_svd(data1)

In [None]:
test_method("lead_vehicle_decelerating_loocv2", data1, 1, 8, beta=.48, seed=0, overwrite=OVERWRITE)

## Test multiple times

In [None]:
def test_multiple_times(name, data, dmin, dmax, seed=0, overwrite=False):
    filename = os.path.join("data", "3_test", 
                            "{:s}_rep_d{:d}-{:d}_seed{:d}".format(name, dmin, dmax, seed))
    if os.path.exists(filename) and not overwrite:
        with open(filename, "rb") as file:
            return pickle.load(file)
        
    nrepeat = 50
    np.random.seed(seed)
    scores1 = np.zeros((nrepeat, dmax-dmin+2))
    scores2 = np.zeros_like(scores1)
    
    for i in tqdm(range(nrepeat)):
        indexa, indexb = train_test_split(np.arange(N+Nz), test_size=Nz,
                                          random_state=nrepeat*seed+i)
        dataa, datab = data[indexa, :], data[indexb, :]

        # Using the default.
        generated = naive_approach(dataa, n=Nw)
        scores1[i, 0] = score(generated, datab)
        scores2[i, 0] = score(generated, dataa)

        # Using approach with different d values.
        for d in range(dmin, dmax+1):
            generated = new_approach(dataa, d=d, n=Nw)
            scores1[i, d-dmin+1] = score(generated, datab)
            scores2[i, d-dmin+1] = score(generated, dataa)
    
    with open(filename, "wb") as file:
        pickle.dump((scores1, scores2), file)
    
    return scores1, scores2

In [None]:
def boxplots(scores, ylabel="metric", ax=None):
    if ax is None:
        _, ax = plt.subplots()
    ax.boxplot(scores)
    ax.set_xticks(np.arange(1, dmax-dmin+3))
    ax.set_xticklabels(["Training set\n{:.3f}".format(np.median(scores[:, 0]))] +
                       ["d={:d}\n{:.3f}".format(d, np.median(scores[:, d-dmin+1])) 
                        for d in range(dmin, dmax+1)])
    if ylabel == "metric":
        ax.set_ylabel(r"$M(\mathcal{W},\mathcal{Z},\mathcal{X})$")
    elif ylabel == "many":
        ax.set_ylabel(r"$W(\hat{f},f)$")
    elif ylabel == "self":
        ax.set_ylabel(r"$W(\mathcal{W}, \mathcal{X})$")
    elif ylabel == "penalty":
        ax.set_ylabel(r"$W(\mathcal{W}, \mathcal{Z}) - W(\mathcal{W}, \mathcal{X})$")
    elif ylabel == "wasserstein":
        ax.set_ylabel(r"$W(\mathcal{W}, \mathcal{Z})$")

In [None]:
dmin, dmax = 2, 5
beta1 = .25
wasser_test1, wasser_train1 = test_multiple_times("lead_vehicle_decelerating_loocv2", data1, 
                             dmin, dmax, overwrite=OVERWRITE)
boxplots(wasser_test1 + beta1*(wasser_test1-wasser_train1))

## Validation of scoring measure

In [None]:
def validation_scoring(name, data, dreal, dmin, dmax, overwrite=False):
    filename = os.path.join("data", "3_test", 
                            "{:s}_val_d{:d}-{:d}_dreal{:d}".format(name, dmin, dmax, dreal))
    if os.path.exists(filename) and not overwrite:
        with open(filename, "rb") as file:
            return pickle.load(file)
    
    nrepeat = 50
    np.random.seed(0)
    
    old_few = np.zeros((nrepeat, dmax-dmin+2))
    old_many = np.zeros_like(old_few)
    self_few = np.zeros_like(old_few)
    
    for i in tqdm(range(nrepeat)):
        data_new = new_approach(data, d=dreal, n=N)
        data_few = new_approach(data, d=dreal, n=Nz)
        data_many = new_approach(data, d=dreal, n=Nw)

        # Using the default.
        generated= naive_approach(data_new, n=Nw)
        old_few[i, 0] = score(generated, data_few)
        old_many[i, 0] = score(generated, data_many)
        self_few[i, 0] = score(generated, data_new)

        # Using new approach with different d
        for d in range(dmin, dmax+1):
            generated = new_approach(data_new, d=d, n=Nw)
            old_few[i, d-dmin+1] = score(generated, data_few)
            old_many[i, d-dmin+1] = score(generated, data_many)
            self_few[i, d-dmin+1] = score(generated, data_new)
            
    with open(filename, "wb") as file:
        pickle.dump((old_few, old_many, self_few), file)
    
    return old_few, old_many, self_few

In [None]:
def plot_correlation(old_many, old_few, self_few):
    beta = np.linspace(0, 1, 100)
    correlation = np.zeros(len(beta))
    for i in range(len(beta)):
        new_score = old_few + beta[i]*(old_few - self_few)
        correlation[i] = np.corrcoef(np.median(old_many, axis=0),
                                     np.median(new_score, axis=0))[0, 1]
        
    _, ((ax11, ax12), (ax21, ax22)) = plt.subplots(2, 2, figsize=(12, 8))
    boxplots(old_many, "many", ax=ax11)
    boxplots(old_few, "wasserstein", ax=ax12)
    boxplots(old_few - self_few, "penalty", ax=ax21)
    ax22.plot(beta, correlation)
    ax22.set_xlabel(r"$\beta$")
    ax22.set_ylabel(r"Correlation medians of $W(\hat{f},f)$ and "+
                    r"$M(\mathcal{W}, \mathcal{Z}, \mathcal{X})$")
    ax22.set_title("Max correlation: {:.3f} at beta={:.2f}".format(np.max(correlation), 
                                                                   beta[np.argmax(correlation)]))
    plt.tight_layout()

In [None]:
dreal = 2
old_few1, old_many1, self_few1 = validation_scoring("lead_vehicle_decelerating_loocv2", data1, dreal, 
                                                    dmin, dmax, overwrite=OVERWRITE)
plot_correlation(old_many1, old_few1, self_few1)

# Cut-in

## Load data

y(t) - (1) speed of other car, (2) y of other car

theta - (1) initial ego vehicle speed, (2) initial longitudinal position other car

In [None]:
cutins = DocumentManagement(os.path.join("data", "5_scenarios", "cut_in_scenarios2.json"))
print("Number of cut-in scenarios: {:d}".format(len(cutins.collections["scenario"])))

In [None]:
key_delete = []
for key in cutins.collections["scenario"]:
    scenario = cutins.get_item("scenario", key)
    time = np.linspace(scenario.get_tstart(), scenario.get_tend(), n)
    speed = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                               StateVariable.LON_TARGET, time)[:, 0]
    if np.any(speed <= 0.0):
        key_delete.append(key)
        continue
    lat = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                             StateVariable.LAT_TARGET, time)
    if np.abs(lat[0]) < 1.2 or np.abs(lat[-1]) > 2:
        key_delete.append(key)
        continue
for key in key_delete:
    cutins.delete_item("scenario", key)
print("Number of cut-in scenarios: {:d}".format(len(cutins.collections["scenario"])))

In [None]:
n = 50
data2 = np.zeros((len(cutins.collections["scenario"]), 2*n+3))
for i, key in enumerate(cutins.collections["scenario"]):
    scenario = cutins.get_item("scenario", key)
    data2[i, 2*n] = scenario.get_duration()
    time = np.linspace(0,  data2[i, 2*n], n) + scenario.get_tstart()
    data2[i, :n] = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                                      StateVariable.LON_TARGET, time)[:, 0]
    data2[i, n:2*n] = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                                         StateVariable.LAT_TARGET, time)
    data2[i, 2*n+1] = scenario.get_state(scenario.get_actor_by_name("ego vehicle"),
                                         StateVariable.SPEED, scenario.get_tstart())
    data2[i, 2*n+2] = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                                         StateVariable.LON_TARGET, scenario.get_tstart())[1]

# Do the weighting
weights2 = np.zeros(data2.shape[1])
weights2[:n] = 1 / (np.mean(np.std(data2[:, :n], axis=0))*n)
weights2[n:2*n] = 1 / (np.mean(np.std(data2[:, n:2*n], axis=0))*n)
weights2[2*n:] = 1 / np.std(data2[:, 2*n:], axis=0)
data2 = data2 * weights2
weights2

In [None]:
Nz = len(data2) // 4
N = len(data2) - Nz
Nw = 5000
print(N, Nz, Nw, N+Nz)

In [None]:
plot_show_svd(data2)

## Test for different values of d

In [None]:
test_method("cutin_loocv2", data2, 1, 8, beta=beta2, seed=0, overwrite=OVERWRITE)

## Test multiple times

In [None]:
dmin, dmax = 2, 7
beta2 = 0.25
wasser_test2, wasser_train2 = test_multiple_times("cutin_loocv3", data2, dmin, dmax, 
                                                  overwrite=OVERWRITE)
boxplots(wasser_test2 + beta2*(wasser_test2-wasser_train2))

## Validation of scoring measure

In [None]:
dreal = 3
old_few2, old_many2, self_few2 = validation_scoring("cutin_loocv2", data2, dreal, dmin, 
                                                 dmax, overwrite=OVERWRITE)
plot_correlation(old_many2, old_few2, self_few2)

In [None]:
dreal = 7
old_few3, old_many3, self_few3 = validation_scoring("cutin_loocv2", data2, dreal, dmin, 
                                                    dmax, overwrite=OVERWRITE)
plot_correlation(old_many3, old_few3, self_few3)

# Plots for papers

In [None]:
FOLDER = os.path.join("..", "..", "20210301 Scenario generation", "figs")

def tikz_save(name):
    save(os.path.join(FOLDER, "{:s}.tikz".format(name)),
         axis_width='\\figurewidth', axis_height='\\figureheight',
         extra_axis_parameters=["xticklabel style={align=center}",
                                "yticklabel style={/pgf/number format/fixed,"+
                                "/pgf/number format/precision=3}",
                                "scaled y ticks=false"])

wasser_tests = [wasser_test1, wasser_test2]
wasser_trains = [wasser_train1, wasser_train2]
old_manys = [old_many1, old_many2]
old_fews = [old_few1, old_few2]
self_fews = [self_few1, self_few2]
betas = [beta1, beta2]
linestyles = ["-", "--"]
nplots = 100

### Plot scores for both methods

In [None]:
old_few1.shape

In [None]:
for i, (wasser_test, wasser_train, beta) in enumerate(zip(wasser_tests, wasser_trains, betas)):
    f, ax = plt.subplots()
    s = np.median(wasser_test + beta*(wasser_test - wasser_train), axis=0)
    dmax = wasser_test.shape[1] + 2 - dmin
    ax.plot(s, "o", c=(0, 0, 0))
    ax.plot(np.median(wasser_test, axis=0), "s", mfc=(1, 1, 1), mec=(0, 0, 0))
    ax.plot(np.median(wasser_test - wasser_train, axis=0), "s", c=(0, 0, 0))
    ax.set_xticks(np.arange(0, dmax-dmin+2))
    ax.set_xticklabels([r"Training\\set"] + 
                       [r"\$\dimension={:d}\$".format(d) for d in range(dmin, dmax+1)])
    ax.set_ylabel(r"Metric")
    tikz_save("scores_{:d}".format(i+1))

### Plot "true" Wasserstein

In [None]:
for i, (old_many, old_few, self_few, beta) in enumerate(zip(old_manys, old_fews, self_fews, betas)):
    f, ax = plt.subplots()
    dmax = old_few.shape[1] + 2 - dmin
    ax.plot(np.median(old_few + beta*(old_few-self_few), axis=0), "o", c=(0, 0, 0))
    ax.plot(np.median(old_few, axis=0), "s", mfc=(1, 1, 1), mec=(0, 0, 0))
    ax.plot(np.median(old_few - self_few, axis=0), "s", c=(0, 0, 0))
    ax.plot(np.median(old_many, axis=0), marker="o", mfc=(1, 1, 1), mec=(0, 0, 0), linestyle="")
    ax.set_xticks(np.arange(0, dmax-dmin+2))
    ax.set_xticklabels([r"Training\\set"] + 
                       [r"\$\dimension={:d}\$".format(d) for d in range(dmin, dmax+1)])
    ax.set_ylabel("Wasserstein metric")
    tikz_save("wasserstein_{:d}".format(i+1))

### Correlation

In [None]:
f, ax = plt.subplots()
# solid = lead vehicle decelerating
# dashed = cut-in
for old_many, old_few, self_few, ls in zip(old_manys, old_fews, self_fews, linestyles):
    beta = np.linspace(0, 1, 100)
    correlation = np.zeros(len(beta))
    for i in range(len(beta)):
        new_score = old_few + beta[i]*(old_few - self_few)
        correlation[i] = np.corrcoef(np.median(old_many, axis=0),
                                     np.median(new_score, axis=0))[0, 1]
    ax.plot(beta, correlation, c=(0, 0, 0), linestyle=ls)
    print(beta[np.argmax(correlation)])
ax.set_xlim(0, 1)
ax.set_xlabel(r"\$\penaltyweight\$")
ax.set_ylabel("Correlation")
tikz_save("correlation")

### Generated scenarios vs recorded scenarios

In [None]:
np.random.seed(0)
naive = naive_approach(data1, n=nplots)
f, ax = plt.subplots()
for i in range(nplots):
    time = np.linspace(0, naive[i, -1]/weights1[-1], n)
    ax.plot(time, 3.6*naive[i, :-1]/weights1[:-1], c=(.2, .2, .2))
ax.set_xlabel("Time [s]")
ax.set_ylabel("Speed [km/h]")
tikz_save("lvd_recorded")

In [None]:
np.random.seed(0)
generated = new_approach(data1, n=nplots*2, d=2)
f, ax = plt.subplots()
iplus = 0
for i in range(nplots):
    while generated[i+iplus, -1]/weights1[-1] <= 0.2:
        iplus += 1
    time = np.linspace(0, generated[i+iplus, -1]/weights1[-1], n)
    ax.plot(time, 3.6*generated[i+iplus, :-1]/weights1[:-1], c=(.2, .2, .2))
ax.set_xlabel("Time [s]")
ax.set_ylabel("Speed [km/h]")
tikz_save("lvd_generated")

In [None]:
np.random.seed(0)
naive = naive_approach(data2, n=nplots)
f, ax = plt.subplots()
for i in range(nplots):
    time = np.linspace(0, naive[i, 2*n]/weights2[-1], n)
    ax.plot(time, naive[i, n:2*n]/weights2[n:2*n], c=(.2, .2, .2))
ax.set_xlabel("Time [s]")
ax.set_ylabel("Lateral position [m]")
# tikz_save("ci_recorded")

In [None]:
np.random.seed(1)
generated = new_approach(data2, n=nplots*3, d=3)
f, ax = plt.subplots()
iplus = 0
for i in range(nplots):
    while generated[i+iplus, 2*n]/weights2[2*n] <= 0.2:
        iplus += 1
    time = np.linspace(0, generated[i+iplus, 2*n]/weights2[2*n], n)
    ax.plot(time, generated[i+iplus, n:2*n]/weights2[n:2*n], c=(.2, .2, .2))
ax.set_xlabel("Time [s]")
ax.set_ylabel("Lateral position [m]")
# tikz_save("ci_generated")