# Method for selection of hyperparameters of parametrization

In [None]:
import numpy as np
import pickle
from spline_inter1d import SplineInter1D, _Spline
import matplotlib.pyplot as plt
%matplotlib inline
from kde import KDE
from tqdm import tqdm_notebook as tqdm
from sklearn.preprocessing import StandardScaler

## Simplified example

In [None]:
# Define the spline function that we later need
spl_event = SplineInter1D([0, 1/2, 1], left_constraints=[1], right_constraints=[0])
spl = _Spline(spl_event.t, spl_event.d, spl_event.thetafixed)
def theta(thetatilde):
    return spl_event.thetafixed + np.dot(spl_event.v2, thetatilde)
def thetatilde(theta):
    return np.dot(spl_event.v2.T, theta - spl_event.thetafixed)

In [None]:
# This is the error function that measures how good the parameters 'z' fit with the curve y
def error(y, z, sigma=0.1):
    """Determine the error
    y is an test curve, evaluated at default x values
    z is a vector containing the parameters from the kde
    sigma 'Standard deviation', kind of allowed error
    """
    # Construct spline and evaluate
    spl.theta = theta(z)
    yz = spl()
    
    # Return the function
    # return np.mean(np.exp(-(y - yz)**2 / (2 * sigma**2))) / (sigma * np.sqrt(2*np.pi))
    # return np.mean(-np.abs(y - yz) / sigma) - np.log(2 * sigma)
    # return np.exp(-np.mean((y - yz)**2 / (2 * sigma * sigma)) / np.sqrt(2 * np.pi * sigma))
    return np.exp(-np.mean(np.abs(y - yz) / sigma))

In [None]:
# Do it for a very simple case
t = np.linspace(0, 1, 100)
spl(t)  # Sets the x-values
v1 = -t**2 + 1
v2 = t**2 -2*t + 1
v3 = -t + 1
coef_train = np.array([thetatilde(spl_event.fit(t, v1).theta), thetatilde(spl_event.fit(t, v2).theta)])
plt.plot(t, v1, 'b', label='Training curve')
plt.plot(t, v2, 'b')
plt.plot(t, v3, 'r', label='Test curve')
plt.xlabel('t')
plt.ylabel('y')
plt.legend()
plt.grid('on')

In [None]:
H = [0.01, 0.1, 0.2, 0.5, 1, 2]
f, axs = plt.subplots(2, 3, figsize=(16, 8))
for h, ax in zip(H, np.ravel(axs)):
    kde = KDE(coef_train, bandwidth=h, scale_data=False)
    kde.compute_kde()
    for i in range(40):
        z = kde.draw_random_sample(1)[0]
        spl.theta = theta(z)
        ax.plot(t, spl(t), color=[.5, .5, 1])
    ax.set_title('h = {:.2f}'.format(h))
    ax.set_xlabel('t')
    ax.set_ylabel('y')
    ax.grid('on')
plt.tight_layout()

In [None]:
N = 1000
H = np.logspace(-2, 1, 31)
Sigma = [0.005, 0.01, 0.05, 0.1, 0.5, 1]
E = np.zeros((len(Sigma), len(H)))
for isigma, sigma in enumerate(tqdm(Sigma)):
    for ih, h in enumerate(tqdm(H, leave=False)):
        kde = KDE(coef_train, bandwidth=h, scale_data=False)
        kde.compute_kde()
        e = np.zeros(N)
        for i in range(N):
            z = kde.draw_random_sample(1)[0]
            e[i] = error(v3, z, sigma=sigma)
        E[isigma, ih] = np.mean(e, axis=0)

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 4))
for sigma, e in zip(Sigma, E):
    plthandle = ax1.semilogx(H, e, label='sigma = {:.3f}'.format(sigma))
    ax1.plot(H[np.argmax(e)], np.max(e), 'x', color=plthandle[0].get_color(), ms=10, mew=3)
    plthandle = ax2.semilogx(H, e / np.max(e), label='sigma = {:.3f}'.format(sigma))
    ax2.plot(H[np.argmax(e)], 1, 'x', color=plthandle[0].get_color(), ms=10, mew=3)
ax1.grid('on')
ax1.set_xlabel('Bandwidth')
ax1.set_ylabel('Score')
ax2.grid('on')
ax2.set_xlabel('Bandwidth')
ax2.set_ylabel('Score normalized per curve')
ax1.legend()

## Real example

In [None]:
with open('df.p', 'rb') as f:
    (dfs, scaling) = pickle.load(f)

In [None]:
# Compute the spline coefficients
ncoefs = spl_event.v2.shape[1]
coefs = np.zeros((len(dfs), ncoefs))
for i, df in enumerate(tqdm(dfs)):
    coefs[i] = thetatilde(spl_event.fit(df['time'], df['vel']).theta)
scaler = StandardScaler()
scaler.fit(np.concatenate((scaling.T, coefs)))

In [None]:
np.corrcoef(np.random.randn(2, 5))