### Imports

Installing private repos in colab is tricky, you need to generate a private access token with full package rights on Github website, then run:
`!pip install git+https://username:token@github.com/EEA-sensors/parallelEKF.git`


In [1]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [None]:
!lscpu |grep 'Model name'

In [2]:
!pip install git+https://username:token@github.com/EEA-sensors/parallelEKF.git

Collecting git+https://AdrienCorenflos:****@github.com/EEA-sensors/parallelEKF.git
  Cloning https://AdrienCorenflos:****@github.com/EEA-sensors/parallelEKF.git to /tmp/pip-req-build-jyemhmb9
  Running command git clone -q 'https://AdrienCorenflos:****@github.com/EEA-sensors/parallelEKF.git' /tmp/pip-req-build-jyemhmb9
Building wheels for collected packages: pekf
  Building wheel for pekf (setup.py) ... [?25l[?25hdone
  Created wheel for pekf: filename=pekf-0.1-cp36-none-any.whl size=9699 sha256=5556d1726bcfd136bb834edcdd74771d3cfc26b2ca095edbd6c370eefa566178
  Stored in directory: /tmp/pip-ephem-wheel-cache-3ap2acqd/wheels/af/11/18/8e3783ad4add7a0d6bba4175e2136c79b51b80ca37b5250636
Successfully built pekf


In [3]:
import jax.numpy as jnp
from jax import jit
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import time
import tqdm

from pekf.parallel import ekf, eks
from pekf.sequential import ekf as seq_ekf, eks as seq_eks
from pekf.models.bearings import get_data, make_parameters, plot_bearings
from pekf.utils import MVNormalParameters

seaborn.set()

### Input parameters

In [4]:
s1 = jnp.array([-1.5, 0.5])  # First sensor location
s2 = jnp.array([1., 1.])  # Second sensor location
r = 0.5  # Observation noise (stddev)
dt = 0.01  # discretization time step
x0 = jnp.array([0.1, 0.2, 1, 0])  # initial true location
qc = 0.01  # discretization noise
qw = 10  # discretization noise

T = 20000  # number of observations



### Get parameters

In [5]:
Q, R, observation_function, transition_function = make_parameters(qc, qw, r, dt, s1, s2)

### Get data

In [6]:
ts, true_states, observations = get_data(x0, dt, r, T, s1, s2, 42)

### We can now run the filter

Initial state guess

In [7]:
m = jnp.array([-1., -1., 0., 0., 0.])
P = jnp.eye(5)

initial_guess = MVNormalParameters(m, P)

In [8]:
filtered_states = ekf(initial_guess, observations, transition_function, Q, observation_function, R)

### Sequential vs Parallel computation time comparison

In [9]:
def profile_filter(f_method, lengths, n_runs=10):
    res_mean = []
    res_std = []
    for j in tqdm.tqdm(lengths):
        #first call to jit the function
        obs = observations[:j]
        _ = f_method(initial_guess, obs, transition_function, Q, observation_function, R)
        run_times = []
        for _ in range(n_runs):
            tic = time.time()
            states = f_method(initial_guess, obs, transition_function, Q, observation_function, R)
            states.mean.block_until_ready()
            toc = time.time()
            run_times.append(toc - tic)
        res_mean.append(np.mean(run_times))
        res_std.append(np.std(run_times))
    return np.array(res_mean), np.array(res_std)

In [10]:
def profile_smoother(s_method, lengths, n_runs=10):
    res_mean = []
    res_std = []
    for j in tqdm.tqdm(lengths):
        #first call to jit the function
        states = MVNormalParameters(filtered_states.mean[:j], filtered_states.cov[:j])
        _ = s_method(transition_function, Q, states)
        run_times = []
        for _ in range(n_runs):
            tic = time.time()
            s_states = s_method(transition_function, Q, states)
            s_states.mean.block_until_ready()
            toc = time.time()
            run_times.append(toc - tic)
        res_mean.append(np.mean(run_times))
        res_std.append(np.std(run_times))
    return np.array(res_mean), np.array(res_std)

Let's now run the sequential vs the parallel implementation to see the performance gain coming from such parallelisation

In [11]:
lengths_space = np.logspace(1, np.log10(T), num=20).astype(np.int32)

In [12]:
jitted_parallel_filter = jit(ekf, static_argnums=(2, 4))  # transition_function and observation_function 
jitted_parallel_smoother = jit(eks, static_argnums=(0,))  # transition_function 

jitted_sequential_filter = jit(seq_ekf, static_argnums=(2, 4))  # transition_function and observation_function 
jitted_sequential_smoother = jit(seq_eks, static_argnums=(0, ))  # transition_function 

In [None]:
seq_filter_mean, seq_filter_std = profile_filter(jitted_sequential_filter, lengths_space)
par_filter_mean, par_filter_std = profile_filter(jitted_parallel_filter, lengths_space)

seq_smooth_mean, seq_smooth_std = profile_smoother(jitted_sequential_smoother, lengths_space)
par_smooth_mean, par_smooth_std = profile_smoother(jitted_parallel_smoother, lengths_space)

100%|██████████| 20/20 [00:29<00:00,  1.47s/it]
 30%|███       | 6/20 [02:27<06:33, 28.13s/it]

In [None]:
fig, (ax0, ax1) = plt.subplots(nrows=2, figsize=(10, 10), sharex=True, sharey=True)
ax0.loglog(lengths_space, seq_filter_mean, label="sequential E-KF")
ax0.loglog(lengths_space, par_filter_mean, label="parallel E-KF")
ax0.legend()

ax1.loglog(lengths_space, seq_smooth_mean, label="sequential E-RTS")
ax1.loglog(lengths_space, par_smooth_mean, label="parallel E-RTS")
ax1.legend()

fig.savefig("/content/cpu-cpu.png")