## Load all the things

In [6]:
from model.environment import load_env
load_env()
%autoreload 2

ImportError: cannot import name 'load_env'

## Compute and plot empirical distributions and model priors

In [5]:
from model.plotting import plot_grid, default_color, FONT_SIZE
from scipy.stats import gaussian_kde
from model.environment import seg_ns, train_trajs
import numpy as np
from IPython.display import display, clear_output, set_matplotlib_formats

set_matplotlib_formats('pdf', quality=90)

def time_left(traj):
    return (traj.iloc[-1].timestamp - traj.iloc[0].timestamp).seconds

def seg_travel_times(seg):
    seg_traj_ids = seg.traj.unique()
    return np.array([time_left(seg[seg.traj == traj_id]) for traj_id in seg_traj_ids])

def grid_for(data, delta):
    sd = data.std()
    return np.linspace(data.min() - sd, data.max() + sd, delta)

h, w = 4, 3
_, axs = plot_grid(h, w)
for i, seg_n in enumerate(seg_ns):
    time_lefts = seg_travel_times(train_trajs[train_trajs.seg == seg_n])
    xx = grid_for(time_lefts, 10000)
    density = gaussian_kde(time_lefts)
    ax = axs[i // w][i % w]
    ax.hist(time_lefts, bins=50, density=True, color=default_color(0), label='Travel Time')
    ax.plot(xx, density(xx), color=default_color(4), label='Density estimate')
    ax.set_xlabel(r'Time (s)', fontsize=FONT_SIZE - 2)
    ax.set_ylabel('Density', fontsize=FONT_SIZE - 2)
    ax.set_title('Empirical Travel Time Distribution for Segment {}'.format(seg_n), fontsize=FONT_SIZE)
    ax.legend()
    
axs[3, 2].axis('off')

(0.0, 1.0, 0.0, 1.0)

<Figure size 1728x2304 with 12 Axes>

## Compute distance metrics

In [25]:
from model.evaluate import hellinger, js_divergence, kl_divergence
from scipy.stats import norm

def piecewise_hellinger2(q, p, xmin, xmax, delta):
    return np.hstack([hellinger(q, p, x, x + delta)
     for x in np.linspace(xmin, xmax, (xmax - xmin)/delta)
    ]).sum()

_, ax = plot_grid(1, 1)
xmin, xmax, delta = -5, 5, 100000
xx = np.linspace(xmin, xmax, delta)
p = lambda x: norm.pdf(x, 10, 1)
q = lambda x: norm.pdf(x, 0, 1)
ax.plot(xx, p(xx))
ax.plot(xx, q(xx))
print(hellinger(p(xx), q(xx)))
print(kl_divergence(p(xx), q(xx)))
print(js_divergence(p(xx), q(xx)))

0.9982585087919826
13.200182448519325
0.6925393163623104


<Figure size 576x576 with 1 Axes>

In [18]:
from model.trajectory_model import arrival_time_prior
from model.storage import load_models
from model.evaluate import hellinger
from model.plotting import plot_grid
from functools import reduce

def prior_density_from_params(x, model_prior_params):
    unnormed_density = reduce(np.add, [
        norm.pdf(x, mu, np.sqrt(var))
        for mu, var in model_prior_params
    ]).reshape(-1)
    return unnormed_density / unnormed_density.sum()

model_limit = 1
h, w = 4, 3
_, axs = plot_grid(h, w)
for i, seg_n in enumerate(seg_ns):
    models = load_models(route_n, seg_n, model_limit)
    model_prior_params = [arrival_time_prior(m) for m in models]
    prior_density = lambda x: prior_density_from_params(x, model_prior_params)
    time_lefts = seg_travel_times(train_trajs[train_trajs.seg == seg_n])
    empirical_density = gaussian_kde(time_lefts)
    delta = 100000
    xx = grid_for(time_lefts, delta)
    p, q = prior_density(xx), empirical_density(xx)
    print('hellinger distance', hellinger(p, q))
    print('js divergence', js_divergence(p, q))
    q = empirical_density(xx)
    q = q / q.sum()
    ax = axs[i // w][i % w]
    ax.plot(xx, prior_density(xx), label='Model prior distribution')
    ax.plot(xx, q, color=default_color(4), label='Empirical density estimate')
    ax.set_xlabel(r'Time (s)', fontsize=FONT_SIZE - 2)
    ax.set_ylabel('Density', fontsize=FONT_SIZE - 2)
    ax.set_title('Travel Time Distribution for Segment {}'.format(seg_n), fontsize=FONT_SIZE)
    ax.legend()

axs[3, 2].axis('off')

loaded 1 models: [32]
hellinger 0.9881373820194466
js divergence 0.6901284563853509
loaded 1 models: [32]
hellinger 0.8084849187571825
js divergence 0.5059261477482946
loaded 1 models: [32]
hellinger 0.7567081638965674
js divergence 0.44623250879077214
loaded 1 models: [32]
hellinger 0.8405551013278644
js divergence 0.5437189533358002
loaded 1 models: [32]
hellinger 0.8246572054607535
js divergence 0.5241827117453521
loaded 1 models: [32]
hellinger 0.7398975417345969
js divergence 0.42700351178650175
loaded 1 models: [32]
hellinger 0.7622507778367071
js divergence 0.4524779402411069
loaded 1 models: [32]
hellinger 0.7866308340966708
js divergence 0.4801019490801276
loaded 1 models: [32]
hellinger 0.8600505116076054
js divergence 0.5659007495991664
loaded 1 models: [32]
hellinger 0.77797928866298
js divergence 0.4701620993816832
loaded 1 models: [32]
hellinger 0.8198216293662285
js divergence 0.518598485571824


(0.0, 1.0, 0.0, 1.0)

<Figure size 1728x2304 with 12 Axes>