In [None]:
import sys
# Assuming we are in the notebook directory add this so that we can import the library
sys.path.append('..')

import numpy as np
import elfi
import elfi.examples.ma2 as elfi_examples
from elfi import LocalDataStore
from elfi.storage import UnQLiteStore
from functools import partial

import matplotlib
import matplotlib.pyplot as plt

matplotlib.style.use('ggplot')
%matplotlib inline

In [None]:
n = 100
t1_0 = 0.6
t2_0 = 0.2

# Set up observed data y
latents = np.random.randn(n+2)
y = elfi_examples.MA2(n, t1_0, t2_0, n_sim=1, latents=latents)

# Plot
plt.figure(figsize=(11, 6));
plt.plot(np.arange(0,n),y[0,:]);
plt.scatter(np.arange(-2,n), latents);

In [None]:
# Set up the simulator
simulator = partial(elfi_examples.MA2, n)

# Set up autocovariance summaries
ac1 = partial(elfi_examples.autocov, 1)
ac2 = partial(elfi_examples.autocov, 2)

# Number of samples (n_samples is the number of samples we want to accept)
n_samples = 100
quantile = 0.01
max_samples = int(n_samples / 0.01)+1
batch_size = 100

# Different types of storage objects:
# numpy array storage
store_array = np.zeros((2, max_samples,1))
object_store = LocalDataStore(store_array[0])
# NoSQL database (in-memory, but could also be a file)
# Results are saved to collection matching node name
nosql_store = UnQLiteStore()

# Specify the graphical model and which nodes are persisted where
t1 = elfi.Prior('t1', 'uniform', 0, 1, store=nosql_store) # LocalElfiStore object (database)
t2 = elfi.Prior('t2', 'uniform', 0, 1, store=nosql_store) # LocalElfiStore object (database)
Y = elfi.Simulator('MA2', simulator, t1, t2, observed=y, store=nosql_store) # LocalElfiStore object (database)
S2 = elfi.Summary('S1', ac1, Y, store=object_store)  # LocalElfiStore object (numpy array)
S1 = elfi.Summary('S2', ac2, Y, store=store_array[1])  # Storage object
d = elfi.Discrepancy('d', elfi_examples.distance, S1, S2, store="cache")  # Stored within the Node

# Set up rejection sampling
rej = elfi.Rejection(d, [t1, t2], batch_size=batch_size)

In [None]:
# Time and run the simulator in parallel
%time result = rej.sample(n_samples, quantile=0.01)

In [None]:
# Run again, but this time we are able to use the cached results
%time result = rej.sample(n_samples, quantile=0.01)

In [None]:
[t1_post, t2_post] = result['samples']
print("Number of accepted samples {} with threshold {:.2f}".format(len(t1_post), result['threshold']))
print("Posterior means: {:.2f} {:.2f}".format(t1_post.mean(), t2_post.mean()))

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(14,5));
ax[0].hist(t1_post, bins=20);
ax[0].set_title("Posterior for t1");
ax[1].hist(t2_post, bins=20);
ax[1].set_title("Posterior for t2");

In [None]:
# Simulation results were stored in nosql_store
data = nosql_store.get('MA2', slice(0,10))
plt.figure(figsize=(11, 6));
for di in data:
    plt.plot(np.arange(0,n),di);

In [None]:
# Priors were stored in nosql_store
data1 = nosql_store.get('t1', slice(0,1000))
data2 = nosql_store.get('t2', slice(0,1000))
plt.figure(figsize=(6, 6));
plt.scatter(data1, data2);

In [None]:
# Cached results are within the Nodes
data3 = d.acquire(1000).compute()
plt.figure(figsize=(6, 6));
plt.hist(data3);

In [None]:
# Summaries were stored in store_array
fig, ax = plt.subplots(ncols=2, figsize=(14,5));
ax[0].hist(store_array[0], bins=20);
ax[0].set_title("Summary 1");
ax[1].hist(store_array[1], bins=20);
ax[1].set_title("Summary 2");