# Test scenario generation

In [None]:
import os
from typing import NamedTuple
import pickle
import matplotlib.pyplot as plt
import numpy as np
import ot
from tqdm import tqdm_notebook as tqdm
from scipy.spatial import distance
from scipy.stats import ttest_ind
from sklearn.model_selection import train_test_split
from domain_model import DocumentManagement, StateVariable
from stats import KDE

In [None]:
OVERWRITE = False

# Lead vehicle decelerating

## Load data

In [None]:
leaddec = DocumentManagement(os.path.join("data", "5_scenarios", "lead_braking2.json"))
print("Number of lead vehicle decelerating scenarios: {:d}"
      .format(len(leaddec.collections["scenario"])))

In [None]:
n = 50
data = np.zeros((len(leaddec.collections["scenario"]), n+1))
for i, key in enumerate(leaddec.collections["scenario"]):
    scenario = leaddec.get_item("scenario", key)
    data[i, n] = scenario.get_duration()
    time = np.linspace(0, data[i, n], n) + scenario.get_tstart()
    data[i, :n] = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                                     StateVariable.LON_TARGET, time)[:, 0]
    
# Do the weighting
weights = np.zeros(data.shape[1])
weights[:n] = 2 / (np.mean(np.std(data[:, :n], axis=0))*n)
weights[n] = 1 / np.std(data[:, :n])
data = data * weights

In [None]:
Nz = len(data) // 4
N = len(data) - Nz
Nw = 10000
print(N, Nz, Nw)

## Define method

In [None]:
def score(generated, data):
    return ot.emd2([], [], distance.cdist(generated, data))

In [None]:
def naive_approach(data, n=None):
    i = np.random.choice(np.arange(len(data)), len(data) if n is None else n)
    return data[i, :]

def new_approach(data, d=3, n=None):
    if n is None:
        n = len(data)
    generated = np.zeros((n, data.shape[1]))
    mean = np.mean(data, axis=0)
    u,s,v = np.linalg.svd(data-mean, full_matrices=False)
    k = KDE(u[:, :d], scaling=True)
    k.set_bandwidth(k.silverman())
    for i in range(n):
        pars = k.sample()[0]
        generated[i] = np.dot(pars*s[:d], v[:d]) + mean
    return generated

## Test for different values of d

In [None]:
def test_method(name, dmin, dmax, beta=1, seed=0, overwrite=False):
    filename = os.path.join("data", "3_test", 
                            "{:s}_d{:d}-{:d}_seed{:d}.p".format(name, dmin, dmax, seed))
    if os.path.exists(filename) and not overwrite:
        with open(filename, "rb") as file:
            scores1, scores2 = pickle.load(file)
    else:
        scores1 = np.zeros(dmax-dmin+2)
        scores2 = np.zeros_like(scores1)
        
        np.random.seed(seed)
        indexa, indexb = train_test_split(np.arange(len(data)), test_size=Nz, 
                                          random_state=seed)
        dataa, datab = data[indexa, :], data[indexb, :]
        
        generated = naive_approach(dataa, n=Nw)
        scores1[0] = score(generated, datab)
        scores2[0] = score(generated, dataa)
        
        for i in range(dmin, dmax+1):
            generated = new_approach(dataa, d=i, n=Nw)
            scores1[i-dmin+1] = score(generated, datab)
            scores2[i-dmin+1] = score(generated, dataa)
        with open(filename, "wb") as file:
            pickle.dump((scores1, scores2), file)
    
    combined_score = (1+beta)*scores1-beta*scores2
    for i in range(len(scores1)):
        print("q={:2d}: {:7.4f} {:7.4f} {:7.4f}".format(i, scores1[i], scores2[i], combined_score[i]),
              end="")
        if combined_score[i] == np.min(combined_score):
            print("  *")
        else:
            print()

In [None]:
test_method("lead_vehicle_decelerating2", 1, 8, beta=.48, seed=0, overwrite=OVERWRITE)

## Test multiple times

In [None]:
def test_multiple_times(name, dmin, dmax, seed=0, overwrite=False):
    filename = os.path.join("data", "3_test", 
                            "{:s}_rep_d{:d}-{:d}_seed{:d}".format(name, dmin, dmax, seed))
    if os.path.exists(filename) and not overwrite:
        with open(filename, "rb") as file:
            return pickle.load(file)
        
    nrepeat = 50
    np.random.seed(seed)
    scores1 = np.zeros((nrepeat, dmax-dmin+2))
    scores2 = np.zeros_like(scores1)
    
    for i in tqdm(range(nrepeat)):
        indexa, indexb = train_test_split(np.arange(N+Nz), test_size=Nz,
                                          random_state=nrepeat*seed+i)
        dataa, datab = data[indexa, :], data[indexb, :]

        # Using the default.
        generated = naive_approach(dataa, n=Nw)
        scores1[i, 0] = score(generated, datab)
        scores2[i, 0] = score(generated, dataa)

        # Using approach with different d values.
        for d in range(dmin, dmax+1):
            generated = new_approach(dataa, d=d, n=Nw)
            scores1[i, d-dmin+1] = score(generated, datab)
            scores2[i, d-dmin+1] = score(generated, dataa)
    
    with open(filename, "wb") as file:
        pickle.dump((scores1, scores2), file)
    
    return scores1, scores2

In [None]:
def boxplots(scores, ylabel="metric", ax=None):
    if ax is None:
        _, ax = plt.subplots()
    ax.boxplot(scores)
    ax.set_xticks(np.arange(1, dmax-dmin+3))
    ax.set_xticklabels(["Training set\n{:.3f}".format(np.median(scores[:, 0]))] +
                       ["d={:d}\n{:.3f}".format(d, np.median(scores[:, d-dmin+1])) 
                        for d in range(dmin, dmax+1)])
    if ylabel == "metric":
        ax.set_ylabel(r"$M(\mathcal{W},\mathcal{Z},\mathcal{X})$")
    elif ylabel == "many":
        ax.set_ylabel(r"$W(\hat{f},f)$")
    elif ylabel == "self":
        ax.set_ylabel(r"$W(\mathcal{W}, \mathcal{X})$")
    elif ylabel == "wasserstein":
        ax.set_ylabel(r"$W(\mathcal{W}, \mathcal{Z})$")

In [None]:
dmin, dmax = 2, 5
beta = .5
s1, s2 = test_multiple_times("lead_vehicle_decelerating2", dmin, dmax, overwrite=OVERWRITE)
boxplots(s1 + beta*(s1-s2))

## Validation of scoring measure

In [None]:
def validation_scoring(name, dreal, dmin, dmax, overwrite=False):
    filename = os.path.join("data", "3_test", 
                            "{:s}_val_d{:d}-{:d}_dreal{:d}".format(name, dmin, dmax, dreal))
    if os.path.exists(filename) and not overwrite:
        with open(filename, "rb") as file:
            return pickle.load(file)
    
    nrepeat = 10
    np.random.seed(0)
    
    old_few = np.zeros((nrepeat, dmax-dmin+2))
    old_many = np.zeros_like(old_few)
    self_few = np.zeros_like(old_few)
    
    for i in tqdm(range(nrepeat)):
        data_new = new_approach(data, d=dreal, n=N)
        data_few = new_approach(data, d=dreal, n=Nz)
        data_many = new_approach(data, d=dreal, n=Nw)

        # Using the default.
        generated= naive_approach(data_new, n=Nw)
        old_few[i, 0] = score(generated, data_few)
        old_many[i, 0] = score(generated, data_many)
        self_few[i, 0] = score(generated, data_new)

        # Using new approach with different d
        for d in range(dmin, dmax+1):
            generated = new_approach(data_new, d=d, n=Nw)
            old_few[i, d-dmin+1] = score(generated, data_few)
            old_many[i, d-dmin+1] = score(generated, data_many)
            self_few[i, d-dmin+1] = score(generated, data_new)
            
    with open(filename, "wb") as file:
        pickle.dump((old_few, old_many, self_few), file)
    
    return old_few, old_many, self_few

In [None]:
def plot_correlation(old_many, old_few, self_few):
    beta = np.linspace(0, 1, 100)
    correlation = np.zeros(len(beta))
    for i in range(len(beta)):
        new_score = old_few + beta[i]*(old_few - self_few)
        correlation[i] = np.corrcoef(np.median(old_many, axis=0),
                                     np.median(new_score, axis=0))[0, 1]
        
    _, ((ax11, ax12), (ax21, ax22)) = plt.subplots(2, 2, figsize=(12, 8))
    boxplots(old_many, "many", ax=ax11)
    boxplots(old_few, "wasserstein", ax=ax12)
    boxplots(self_few, "self", ax=ax21)
    ax22.plot(beta, correlation)
    ax22.set_xlabel(r"$\beta$")
    ax22.set_ylabel(r"Correlation medians of $W(\hat{f},f)$ and "+
                    r"$M(\mathcal{W}, \mathcal{Z}, \mathcal{X})$")
    ax22.set_title("Max correlation: {:.3f} at beta={:.2f}".format(np.max(correlation), 
                                                                   beta[np.argmax(correlation)]))
    plt.tight_layout()

In [None]:
dreal = 2
old_few, old_many, self_few = validation_scoring("lead_vehicle_decelerating2", dreal, dmin, 
                                                 dmax, overwrite=OVERWRITE)
plot_correlation(old_many, old_few, self_few)

# Cut-in

## Load data

y(t) - (1) speed of other car, (2) y of other car

theta - (1) initial ego vehicle speed, (2) initial longitudinal position other car

In [None]:
cutins = DocumentManagement(os.path.join("data", "5_scenarios", "cut_in_scenarios2.json"))
print("Number of cut-in scenarios: {:d}".format(len(cutins.collections["scenario"])))

In [None]:
n = 50
data = np.zeros((len(cutins.collections["scenario"]), 2*n+3))
for i, key in enumerate(cutins.collections["scenario"]):
    scenario = cutins.get_item("scenario", key)
    data[i, 2*n] = scenario.get_duration()
    time = np.linspace(0,  data[i, 2*n], n) + scenario.get_tstart()
    data[i, :n] = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                                     StateVariable.LON_TARGET, time)[:, 0]
    data[i, n:2*n] = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                                        StateVariable.LAT_TARGET, time)
    data[i, 2*n+1] = scenario.get_state(scenario.get_actor_by_name("ego vehicle"),
                                        StateVariable.SPEED, scenario.get_tstart())
    data[i, 2*n+2] = scenario.get_state(scenario.get_actor_by_name("target vehicle"),
                                        StateVariable.LON_TARGET, scenario.get_tstart())[1]

# Do the weighting
weights = np.zeros(data.shape[1])
weights[:n] = 1 / (np.mean(np.std(data[:, :n], axis=0))*n)
weights[n:2*n] = 1 / (np.mean(np.std(data[:, n:2*n], axis=0))*n)
weights[2*n:] = 1 / np.std(data[:, 2*n:], axis=0)
data = data * weights
weights

In [None]:
Nz = len(data) // 4
N = len(data) - Nz
Nw = 5000
print(N, Nz, Nw)

## Test for different values of d

In [None]:
test_method("cutin2", 1, 8, beta=.2, seed=0, overwrite=OVERWRITE)

## Test multiple times

In [None]:
dmin, dmax = 2, 7
beta = 0.2
s1, s2 = test_multiple_times("cutin2", dmin, dmax, overwrite=OVERWRITE)
boxplots(s1 + beta*(s1-s2))

## Validation of scoring measure

In [None]:
dreal = 3
old_few, old_many, self_few = validation_scoring("cutin2", dreal, dmin, 
                                                 dmax, overwrite=OVERWRITE)
plot_correlation(old_many, old_few, self_few)