In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
import ot
from scipy.spatial import distance
from sklearn.model_selection import train_test_split
from stats import KDE

In [None]:
with open(os.path.join("pickles", "df.p"), "rb") as file:
    dfs, scaling = pickle.load(file)
scaling = scaling.T   # [time vstart vend]

In [None]:
n = 50
profiles = np.zeros((len(dfs), n))
for i, df in enumerate(dfs):
    profiles[i] = np.interp(np.linspace(0, 1, n), df["time"], df["vel"])
profiles_scaled = np.zeros_like(profiles)
for i in range(n):
    profiles_scaled[:, i] = profiles[:, i]*(scaling[:, 1] - scaling[:, 2]) + scaling[:, 2]

# Without scaling

In [None]:
indexa, indexb = train_test_split(np.arange(len(profiles)), test_size=.5, random_state=0)

In [None]:
scalinga, scalingb = scaling[indexa, :], scaling[indexb, :]
profilesa, profilesb = profiles[indexa, :], profiles[indexb, :]
pa, pb = profiles_scaled[indexa, :], profiles[indexb, :]
ta, tb = scalinga[:, 0], scalingb[:, 0]

In [None]:
def approach0(profilesa):
    return profilesa

def approach1(profilesa):
    return profilesa[np.random.choice(np.arange(len(profilesa)), len(profilesa)), :]

def approach2(profilesa):
    return np.array([np.linspace(1, 0, profilesa.shape[1]) for _ in range(profilesa.shape[0])])

def approach3(profilesa):
    profiles = np.zeros_like(profilesa)
    x = np.linspace(0, 1, n)
    for i in range(len(profilesa)):
        b = -np.random.rand()*2
        a = -1-b
        profiles[i] = a*x**2 + b*x + 1
    return profiles

def approach4(profilesa, q=3):
    profiles = np.zeros_like(profilesa)
    mean = np.mean(profilesa, axis=0)
    u,s,v = np.linalg.svd(profilesa-mean, full_matrices=False)
    for i in range(len(profilesa)):
        profiles[i] = np.dot(np.random.randn(q)*s[:q]/np.sqrt(profilesa.shape[0]), v[:q]) + mean
    return profiles

def approach5(profilesa):
    profiles = approach1(profilesa)
    x = np.linspace(0, 1, n)
    for i in range(len(profilesa)):
        b = (2*np.random.rand() - 1)*.1
        a = -b
        profiles[i] += a*x**2 + b*x
    return profiles

def approach6(profilesa, q=3):
    profiles = np.zeros_like(profilesa)
    mean = np.mean(profilesa, axis=0)
    u,s,v = np.linalg.svd(profilesa-mean, full_matrices=False)
    for i in range(len(profilesa)):
        profiles[i] = np.dot(np.random.randn(q)*s[:q]/np.sqrt(profilesa.shape[0]), v[:q]) + mean
    profiles[profiles > 1] = 1
    profiles[profiles < 0] = 0
    return profiles

def approach7(profilesa, q=2):
    profiles = np.zeros_like(profilesa)
    mean = np.mean(profilesa, axis=0)
    u,s,v = np.linalg.svd(profilesa-mean, full_matrices=False)
    k = KDE(u[:, :q], scaling=True)
    k.compute_bandwidth()
    profiles = np.dot(k.sample(len(k.data)) * s[:q], v[:q]) + mean
    profiles[profiles > 1] = 1
    profiles[profiles < 0] = 0
    return profiles

In [None]:
def score(approach, test):
    return ot.emd2([], [], distance.cdist(test, approach))

In [None]:
np.random.seed(2)
for i, method in enumerate([approach0, approach1, approach2, approach3, 
                            approach4, approach5, approach6, approach7]):
    profiles = method(profilesa)
    score1 = score(profiles, profilesb)
    score2 = score(profiles, profilesa)
    print("Method {:d}: {:.4f} {:.4f} {:.4f}".format(i, score1, score2, 2*score1-score2))

In [None]:
for i in range(30):
    profiles = approach3(profilesa, q=i)
    score1 = score(profiles, profilesb)
    score2 = score(profiles, profilesa)
    print("q={:2d}: {:.4f} {:.4f} {:.4f}".format(i, score1, score2, 2*score1-score2))

# With scaling

In [None]:
def score(p1, p2, t1, t2):
    a = 50
    s1 = np.hstack((t1[:, np.newaxis]*a, p1))
    s2 = np.hstack((t2[:, np.newaxis]*a, p2))
    return ot.emd2([], [], distance.cdist(s1, s2))

In [None]:
def approach0(pa, tt):
    return pa, ta

def approach1(pa, ta):
    i = np.random.choice(np.arange(len(pa)), len(pa))
    return pa[i, :], ta[i]

def approach2(pa, ta):
    data = np.array([pa[:, 0], pa[:, -1], ta]).T
    k = KDE(data, scaling=True)
    k.compute_bandwidth()
    pars = k.sample(len(pa))
    p = np.zeros(pa.shape)
    for i in range(len(pa)):
        p[i, :] = np.linspace(pars[i, 0], pars[i, 1], pa.shape[1])
    return p, pars[:, 2]

def approach3(pa, ta):
    data = np.array([pa[:, 0]-pa[:, -1], pa[:, -1], (pa[:, 0]-pa[:, -1])/ta]).T
    k = KDE(data, scaling=True)
    k.compute_bandwidth()
    p = np.zeros_like(pa)
    t = np.zeros_like(ta)
    for i in range(len(pa)):
        pars = k.sample()[0]
        while pars[0] <= 0 or pars[2] <= 0:
            pars = k.sample()[0]
        p[i, :] = np.linspace(pars[0], 0, pa.shape[1]) + pars[1]
        t[i] = pars[0] / pars[2]
    return p, t

In [None]:
np.random.seed(2)
for i, method in enumerate([approach0, approach1, approach2, approach3]):
    p, t = method(pa, ta)
    score1 = score(p, pb, t, tb)
    score2 = score(p, pa, t, ta)
    print("Method {:d}: {:.4f} {:.4f} {:.4f}".format(i, score1, score2, 2*score1-score2))

In [None]:
for pp in p:
    plt.plot(pp)

In [None]:
profiles*(scaling[:, 1] - scaling[:, 2])