# Scenario similarity measures

In [None]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
from matplotlib2tikz import save as tikz_save
import os
import scipy
from DynamicTimeWarping import DynamicTimeWarping
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
tikz_path = 'figures'
def tikz(name, extras=None):
    # Convert to tikz
    tikz_save(os.path.join(tikz_path, '{:s}.tikz'.format(name)),
              figureheight='\\figureheight', figurewidth='\\figurewidth',
              extra_axis_parameters=extras, show_info=False) 

In [None]:
# Define function for quickly making tables
table_folder = 'tabular'

def write(f, text):
    f.write(text)
    print(text, end="")

In [None]:
# Define method for looping through set of profiles and print score
def table_distance(V, func, **kwargs):
    nv = len(V)
    print("  ", end="")
    for i in range(nv):
        print(" {:7d}".format(i+1), end="")
    print("")
    for i in range(nv):
        print("{:2d}".format(i+1), end="")
        for j in range(nv):
            if i == j:
                print(" {:7s}".format(""), end="")
            else:
                print(" {:7.1e}".format(func(V[i], V[j], **kwargs)), end="")
        print("")

## Create two example figures

In [None]:
with open(os.path.join('..', '20171126 Parametrization', 'df.p'), 'rb') as f:
    (dfs, scaling) = pickle.load(f)
# Only use first n profiles
n = 400
dfs = dfs[:n].copy()
scaling = scaling.T[:n].copy()

In [None]:
# Show a profile
# 322: hardest braking
# 292: Looks most at 322 when only looking at start and end velocity
# 143: Looks very similar to 292
# 381: Acceleratin similar to 292, but much shorter
ii = [322, 143, 292, 381]
ts = 0.1
V = []
A = []
for j, i in enumerate(ii):
    t = dfs[i]['time']*scaling[i, 0]
    v = (dfs[i]['vel']*(scaling[i, 1] - scaling[i, 2]) + scaling[i, 2])*3.6
    tnew = np.arange(t.values[0], np.floor(t.values[-1] / ts)) * ts
    vnew = np.interp(tnew, t, v)
    V.append(vnew)
    A.append(np.gradient(vnew) / ts)
    plt.plot(tnew, vnew, '.', label="Profile {:d}".format(j+1), ms=1)
    print("Average deceleration: {:.2f} m/s^2".format((scaling[i, 1] - scaling[i, 2]) / scaling[i, 0]))
plt.grid('on')
plt.xlabel('Time [s]')
plt.ylabel('Velocity [km/h]')
plt.legend()
tikz('velocity_profile_example')

## Euclidean distance

In [None]:
def lp(xx, yy, p):
    # Sample yy to have the same number of elements
    yy = np.interp(np.linspace(0, len(yy), len(xx)), np.arange(0, len(yy)), yy)
    
    # Compute the norm
    return np.sum(np.abs(xx - yy)**p) ** (1/p)

def euclidean(xx, yy):
    return lp(xx, yy, 2)

In [None]:
n = len(V)
for i in range(n):
    for j in range(n):
        if not i == j:
            print("Deuclidean({:d},{:d}) = {:.1f}".format(i+1, j+1, euclidean(V[i], V[j])))

In [None]:
for i, j in zip([1, 1, 0], [0, 2, 2]):
    f, ax = plt.subplots(1, 1, figsize=(4, 3))
    ax.plot(V[i], '.', label='Profile {:d}'.format(i+1), ms=1)
    ax.plot(np.interp(np.linspace(0, len(V[j]), len(V[i])), np.arange(0, len(V[j])), V[j]), '.', 
            label='Profile {:d}'.format(j+1), ms=1)
    ax.set_xlabel('Sample')
    ax.set_ylabel('Velocity [km/h]')
    ax.grid('on')
    tikz('velocity_profiles_resampled{:d}{:d}'.format(i+1, j+1))

## Feature-based

### Fourier coefficients

In [None]:
def fft(x):
    X = np.fft.rfft(x)
    f = np.arange(np.floor(len(x)/2)+1) / (len(x) * ts)
    return f, X
def fft_at_freq(x, freqs):
    f, X = fft(x)
    return np.interp(freqs, f, X)
def fourier_based_distance(x, y):
    xhat = fft_at_freq(x, freq) / len(x)
    yhat = fft_at_freq(y, freq) / len(y)
    return np.sqrt(np.sum(np.abs(xhat - yhat)**2))

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 3))
for v in V:
    f, X = fft(v)
    ax1.semilogy(f, np.abs(X)/len(v), '.')
    ax2.plot(f, np.angle(X)/np.pi*180, '.')
ax1.grid('on')
ax2.grid('on')

In [None]:
freq = [1, 2, 3]
table_distance(V, fourier_based_distance)

In [None]:
with open(os.path.join(table_folder, 'fourier_coefficients.tex'), 'w') as f:
    write(f, '\\begin{tabular}{crrr}\n')
    write(f, '    \\toprule\n')
    write(f, '    Time    & \\multicolumn{3}{c}{Fourier coefficients} \\\\\n')
    write(f, '    series  & $\\unit[1]{Hz}$ & $\\unit[2]{Hz}$ & $\\unit[3]{Hz}$ \\\\\\otoprule\n')
    for i, v in enumerate(V):
        F = fft_at_freq(v, freq) / len(v)
        f_real = np.real(F)
        f_imag = np.imag(F)
        write(f, '    $\\profile{{{:d}}}$'.format(i+1))
        for real, imag in zip(f_real, f_imag):
            write(f, ' & ${:.2f}{:s}{:.2f}\\imagi$'.format(real, '+' if imag >= 0 else '-', np.abs(imag)))
        write(f, ' \\\\\n')
    write(f, '    \\bottomrule\n')
    write(f, '\\end{tabular}\n')

### Parameters

In [None]:
# Time duration, velocity difference, end velocity
def parameter_based_distance(x, y):
    x_dt = (len(x) - 1) * ts
    y_dt = (len(y) - 1) * ts
    x_dv = np.max(x) - np.min(x)
    y_dv = np.max(y) - np.min(y)
    x_ve = x[-1]
    y_ve = y[-1]
    
    w_dt = 1
    w_dv = 0.2
    w_ve = 0.2
    
    return np.sqrt(w_dt*(x_dt-y_dt)**2 + w_dv*(x_dv-y_dv)**2 + w_ve*(x_ve-y_ve)**2)
table_distance(V, parameter_based_distance)

### Splines

In [None]:
# Needs normalization of time

### Auto-regressive models

Problem that duration is not taken into account

In [None]:
def ar(x, n=3):
    A = np.zeros((len(x)-n+1, n))
    A[:, 0] = 1
    for i in range(1, n):
        A[:, i] = x[n-i-1:-i]
    b = x[n-1:]
    return np.linalg.lstsq(A, b)[0]
def ar_distance(x, y, n=3):
    xhat = ar(x, n=n)
    yhat = ar(y, n=n)
    return np.sqrt(np.sum((xhat - yhat)**2))

In [None]:
f, axs = plt.subplots(1, len(V), figsize=(5*len(V), 4))
n = 3
A = []
for j, (v, ax) in enumerate(zip(V, axs)):
    a = ar(v/3.6, n=n)
    A.append(a)
    y = np.zeros_like(v)
    y[:n-1] = v[:n-1] / 3.6
    for i in range(n-1, len(y)):
        y[i] = a[0] + np.sum(np.flipud(a[1:]) * y[i-n+1:i])
    y *= 3.6
    ax.plot(v, '.', label='Original')
    ax.plot(y, '.', label='Auto-regression fit')
    ax.set_xlabel('Sample')
    ax.set_ylabel('Velocity [km/h]')
    ax.set_title('Time series {:d}'.format(j+1))
    ax.grid('on')
    ax.legend()

In [None]:
with open(os.path.join(table_folder, 'ar_coefficients.tex'), 'w') as f:
    write(f, '\\begin{tabular}{crrr}\n')
    write(f, '    \\toprule\n')
    write(f, '    Time    & \\multicolumn{3}{c}{Auto-regression coefficients} \\\\\n')
    write(f, '    series & $a_0\,[\\unit{m/s}]$ & $a_1$ & $a_2$ \\\\ \\otoprule\n')
    for i, aa in enumerate(A):
        write(f, '    $\\profile{{{:d}}}$'.format(i+1))
        for a in aa:
            write(f, ' & {:.2f}'.format(a))
        write(f, ' \\\\\n')
    write(f, '    \\bottomrule\n')
    write(f, '\\end{tabular}\n')

In [None]:
table_distance(V, ar_distance, n=3)

### Compare all feature-based methods

In [None]:
with open(os.path.join(table_folder, 'feature_based.tex'), 'w') as f:
    write(f, '\\begin{tabular}{ccrrr}\n')
    write(f, '    \\toprule\n')
    write(f, '    \\multicolumn{2}{c}{Time series} & \\multicolumn{3}{c}{Features} \\\\\n')
    write(f, '    \\textbf{x} & \\textbf{y} & Fourier coefficients & Auto-regression coefficients & Custom features \\\\')
    write(f, ' \\otoprule\n')
    for i, x in enumerate(V):
        for j, y in enumerate(V):
            if j > i:
                write(f, '    $\\profile{{{:d}}}$ & $\\profile{{{:d}}}$'.format(i+1, j+1))
                write(f, ' & {:.1e}'.format(fourier_based_distance(x, y)))
                write(f, ' & {:.1e}'.format(ar_distance(x, y)))
                write(f, ' & {:.1e} \\\\\n'.format(parameter_based_distance(x, y)))
    write(f, '    \\bottomrule\n')
    write(f, '\\end{tabular}\n')

# Dynamic Time Warping

In [None]:
d = DynamicTimeWarping(scale_score=True)
table_distance(V, d.dtw)

In [None]:
d = DynamicTimeWarping(scale_score=True, sakoeband=20)
table_distance(V, d.dtw)

In [None]:
d = DynamicTimeWarping(scale_score=True, ikaturaband=3)
table_distance(V, d.dtw)