In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from sys import stdout
import seaborn as sns

In [None]:
## Reload stuff
from importlib import reload
import ezmc
reload(ezmc)
reload(ezmc.base)
reload(ezmc.samplers)
reload(ezmc.utils)
import ezmc


# Cognitive Modelling

Let's work through a more realistic example.


In [None]:
def simulate_trials(pars, n_trials=1000, max_t=5., dt=.01):
    ndt, drift, threshold = pars
    t = np.arange(0, max_t, dt)
    signal = np.where(t > ndt, drift, 0)
    drift = np.cumsum(signal*dt)
    noise = np.random.normal(0, .1, (n_trials, len(t)))
    diffusion = np.cumsum(noise * np.sqrt(dt), 1)
    X = drift + diffusion
    return X

def X_to_rt(X, pars, max_t=5., dt=.01):
    ndt, drift, threshold = pars
    t = np.arange(0, max_t, dt)
    rt_ix = np.argmax(X > threshold, 1)
    rt = t[rt_ix]
    rt[rt_ix == 0.] = np.nan    
    return rt    

def simulate_rts(pars, n_trials=1000, max_t=5., dt=.01):
    X = simulate_trials(pars, n_trials=n_trials, max_t=max_t, dt=dt)
    return X_to_rt(X, pars, max_t=max_t, dt=dt)

def plot_simlations(X, rt, pars, max_t=5., dt=.01):
    ndt, drift, threshold = pars
    t = np.arange(0, max_t, dt)
    fig, axes = plt.subplots(2, 1, figsize=(8, 6), gridspec_kw={'height_ratios':[.5, 1.] });

    plt.sca(axes[0])
    plt.hist(rt)
    plt.xlim(0, 5)
    plt.xticks([])
    plt.ylabel('RT distribution')

    plt.sca(axes[1])
    for i in range(len(X)):
        plt.plot(t, X[i], 'b', alpha=.1)
    plt.hlines(1., 0, 10)
    plt.xlim(0, 5)
    plt.ylim(-threshold, threshold)
    plt.ylabel('Accumulator')
    plt.xlabel('Time (s)')

    plt.tight_layout()
    plt.show()

In [None]:
# true_pars = [1., 1., .2] # Non-decision time, Drift rate, Noise SD
# true_pars = [1, 1., .25] # Non-decision time, Drift rate, Noise SD
true_pars = [1, 1., .5] # Non-decision time, Drift rate, Threshold
t = np.arange(0, 5, .01)
true_X = simulate_trials(true_pars, n_trials=100)
true_rts =  X_to_rt(true_X, true_pars)

plot_simlations(true_X, true_rts, true_pars)

In [None]:


# def transform_pars(pars):
#     '''Decorrelates pars for fitting'''
#     ndt, drift, noise = pars
#     a = ndt + drift + noise
#     b = drift / a
#     c = noise / a
#     return a, b, c

# def untransform_pars(bayes_pars):
#     '''Returns decorrelated parameters to original space'''
#     a, b, c = bayes_pars
#     noise = a * c
#     drift = a * b
#     ndt = a - drift - noise
#     return ndt, drift, noise
# assert(np.all(np.array(untransform_pars(transform_pars(true_pars))) == np.array( true_pars)))

def posterior_density(pars, true_rts=true_rts):
    ndt, drift, threshold = pars
# def posterior_density(bayes_pars, true_rts=true_rts):
#     ndt, drift, s2 = pars = untransform_pars(bayes_pars)
    ## Normal(0, 10) priors on all parameters
    nlpdf = stats.norm.logpdf # Args: x, loc, scale
    log_prior = nlpdf(ndt, 0, 10) + nlpdf(drift, 0, 10) + nlpdf(threshold, 0, 10)
    if np.any(np.array(pars) < 0):
        return -1e+5 + log_prior
    ## Simulate from model
    sim_rt = simulate_rts(pars, n_trials=200)
    ## Use simulated RTs to estimate liklihood of observed rts
    p_response = 1 - np.mean(np.isnan(sim_rt)) # Normalising for simulations that don't cross threshold
    try:
        kernel = stats.kde.gaussian_kde(sim_rt[~np.isnan(sim_rt)], bw_method='silverman')
        log_lik = np.sum(kernel.logpdf(true_rts) + np.log(p_response))
    except ValueError:
        log_lik = -1e+5
    if np.isnan(log_lik):
        log_lik = -1e+5
    # print(repr(pars), log_posterior)
    log_posterior = log_prior + log_lik
    return log_posterior



In [None]:
posterior_density([2.9497,0.9365,2.1787])

In [None]:
for i in range(5):
    print(posterior_density(true_pars))
#     print(posterior_density(transform_pars(true_pars)))

## Sample

In [None]:
def init_func():
    while 1:
        start = np.random.normal(0, 4, 3)
        if posterior_density(start) > -1e+4:
            return start
sampler = ezmc.MetropolisSampler(func=posterior_density,
                                 par_names=['ndt', 'drift', 'threshold'],
                                 init_func=init_func,
                                proposal_sd=.1)


In [None]:
# sampler.proposal_sd = np.ones(3) * .01

In [None]:
sampler.sample_chains(nchains=4, n=500)

In [None]:
print(true_pars)
for ch in sampler.chains:
    print(ch.values, ch.cur_ll, ch.iterations)

In [None]:
chains = sampler.get_chains()
fig = ezmc.viz.traceplot(chains);
# for ax in fig.get_axes():
#     ax.set_ylim(-5, 5)
# plt.ylim(-5, 5)

In [None]:
ezmc.viz.traceplot(chains, ['ll']);
plt.ylim(-200, 100)

In [None]:
results = sampler.get_results(burn_in=2000, thin=10)

In [None]:
fig = ezmc.viz.traceplot(results, sampler.par_names);

In [None]:
estimates = results.mean()
# trans_est_pars = [estimates[p] for p in sampler.par_names]
# est_pars = untransform_pars(trans_est_pars)
est_pars = [estimates[p] for p in sampler.par_names]
est_pars

In [None]:
sns.pairplot(results[sampler.par_names + ['ll']])

In [None]:
good = results[results['ll'] > -100]
sns.pairplot(good[sampler.par_names + ['ll']])

In [None]:
t = np.arange(0, 5, .01)
best_X = simulate_trials(est_pars, n_trials=100)
best_rts =  X_to_rt(best_X, est_pars)

plot_simlations(best_X, best_rts, est_pars)

In [None]:
plot_simlations(true_X, true_rts, true_pars)

In [None]:



chains = demc_sampler.get_chains()
ezmc.viz.traceplot(chains)

results = demc_sampler.get_results(burn_in=500, thin=50)
ezmc.viz.traceplot(results, pars=['θ1', 'θ2'])


fig, axes = plt.subplots(1, 2, figsize=(8, 4))
plt.sca(axes[0])
setup_axes(newfigure=False)
plt.scatter(true_posterior[0], true_posterior[1], alpha=.5)
plt.title('Direct samples')

plt.sca(axes[1])
setup_axes(newfigure=False)
plt.scatter(results['θ1'], results['θ2'], alpha=.5, color='g')
plt.title('MCMC samples')
plt.tight_layout()
plt.show()


print('Means')
print(results.mean())
print('\nSDs')
print(results.std())
print('\nCorr: %.2f' % np.corrcoef(results['θ1'], results['θ2'])[0,1])

In [None]:
true_posterior = sample_true(1000).T
plt.scatter(true_posterior[0], true_posterior[1])

# DEMC

In [None]:
# ezmc.DifferentialEvolutionSampler?

In [None]:
m = ezmc.DifferentialEvolutionSampler(func=f, 
                                      par_names=['a', 'b'],
                                     init_bounds=[[-10, 10],
                                                 [-10, 10]])

In [None]:
m.sample_chains(nchains=10, n=20000, verbose=1)

In [None]:
chains = m.get_chains()
ezmc.viz.traceplot(chains)

In [None]:
results = m.get_results(burn_in=500, thin=20)
ezmc.viz.traceplot(results, ['a', 'b'])

In [None]:
plt.scatter(results['a'], results['b'])

In [None]:
print(np.mean(true_posterior, 1), np.std(true_posterior, 1), np.corrcoef(true_posterior)[0,1])

In [None]:
a = results[['a', 'b']].values.T
print(np.mean(a, 1), np.std(a, 1), np.corrcoef(a)[0,1])

In [None]:
def kde_countour(x, y, colors='k'):
    deltaX = (max(x) - min(x))/10
    deltaY = (max(y) - min(y))/10
    xmin = min(x) - deltaX
    xmax = max(x) + deltaX
    ymin = min(y) - deltaY
    ymax = max(y) + deltaY
    xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
    positions = np.vstack([xx.ravel(), yy.ravel()])
    values = np.vstack([x, y])
    kernel = stats.gaussian_kde(values)
    f = np.reshape(kernel(positions).T, xx.shape)
    cset = plt.contour(xx, yy, f, colors=colors)
    
kde_countour(results['a'], results['b'], colors='red')
kde_countour(p[0], p[1], colors='blue')

In [None]:
np.sin(p[0, 0], p[1, 0])

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(results['a']);

In [None]:
ezmc.viz.traceplot(results, ['ll'])

In [None]:
raise Exception

# Multiple Chains

In [None]:
m = ezmc.MetropolisSampler(func=f, par_names=['a', 'b'], proposal_sd=[.25, .25], init_func=init_func)
m.sample_chains(4, n=3000, verbose=0)

In [None]:
chains = m.get_chains()

In [None]:
ezmc.viz.traceplot(chains, ['a', 'b'])

In [None]:
results = m.get_results(burn_in=500, thin=2)

In [None]:
traceplot(results)

In [None]:
results.mean()

In [None]:
results.std()

# One Chain

In [None]:
m = ezmc.MetropolisSampler(func=f, par_names=['a', 'b'], proposal_sd=[.25, .25], init_func=init_func)
m.add_chains(1)

In [None]:
m.sample_chain(chain_ix=0, n=100)

In [None]:
chains = m.chains[0].get_results(burn_in=0, thin=1)
chains[['a', 'b']].plot()

In [None]:
results = m.chains[0].get_results(burn_in=2000, thin=10)
results.head()
results[['a', 'b']].hist(bins=10)

In [None]:
print(results.mean())
print(results.std())

In [None]:
plt.scatter(results['a'], results['ll'])

In [None]:
plt.scatter(results['b'], results['ll'])

In [None]:
stats.pearsonr(results['a'], results['b'])

In [None]:
plt.scatter(results['a'], results['b'])

In [None]:
chains['ll'].plot()

In [None]:
m = ezmc.MetropolisSampler

In [None]:
chain.chain