In [1]:
%load_ext autoreload

In [2]:
!pwd

/Users/afengler/OneDrive/git_repos/hddmnn_tutorial


In [3]:
%reload_ext autoreload

# MODULE IMPORTS ----
# warning settings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Data management
import pandas as pd
import numpy as np
import pickle

# Plotting
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

# Stats functionality
from statsmodels.distributions.empirical_distribution import ECDF

# HDDM
import hddm

import pymc as pm
import os

In [4]:
def _post_pred_generate(bottom_node, samples=500, data=None, append_data=False, add_model_parameters=False):
    """Generate posterior predictive data from a single observed node."""
    datasets = []

    ##############################
    # Sample and generate stats
    for sample in range(samples):
        _parents_to_random_posterior_sample(bottom_node)
        # Generate data from bottom node
        sampled_data = bottom_node.random(add_model_parameters = add_model_parameters)
        if append_data and data is not None:
            sampled_data = sampled_data.join(data.reset_index(), lsuffix='_sampled')
        datasets.append(sampled_data)
    return datasets

In [5]:
def _parents_to_random_posterior_sample(bottom_node, pos=None):
    """Walks through parents and sets them to pos sample."""
    for i, parent in enumerate(bottom_node.extended_parents):
        if not isinstance(parent, pm.Node): # Skip non-stochastic nodes
            continue

        if pos is None:
            # Set to random posterior position
            pos = np.random.randint(0, len(parent.trace()))

        assert len(parent.trace()) >= pos, "pos larger than posterior sample size"
        parent.value = parent.trace()[pos]

In [6]:
def pretty_tag(tag):
    return tag[0] if len(tag) == 1 else ', '.join(str(tag))

# Testing Fun

In [7]:
# Metadata
nmcmc = 200
model = 'angle'
n_samples_by_subject = 500

In [8]:
data, full_parameter_dict = hddm.simulators.hddm_dataset_generators.simulator_h_c(n_subjects = 3,
                                                                                  n_samples_by_subject = n_samples_by_subject,
                                                                                  model = model,
                                                                                  p_outlier = 0.00,
                                                                                  conditions = None, 
                                                                                  depends_on = None, 
                                                                                  regression_models = ['t ~ 1 + covariate_name', 'v ~ 1 + covariate_name'], 
                                                                                  regression_covariates = {'covariate_name': {'type': 'continuous', 'range': (0, 1)},
                                                                                                          'covariate_name_2': {'type': 'continuous', 'range': (0, 1)}},
                                                                                  group_only_regressors = False,
                                                                                  group_only = None,
                                                                                  fixed_at_default = None)

In [9]:
# Set up the regressor a regressor:
reg_model_v = {'model': 'v ~ 1 + covariate_name + covariate_name_2', 'link_func': lambda x: x}
reg_model_t = {'model': 't ~ 1 + covariate_name', 'link_func': lambda x: x}
reg_descr = [reg_model_t, reg_model_v]

In [10]:
# Make HDDM model
hddmnn_reg = hddm.HDDMnnRegressor(data,
                                  reg_descr, 
                                  include = hddm.simulators.model_config[model]['hddm_include'],
                                  model = model,
                                  informative = False,
                                  p_outlier = 0.0)

Includes supplied:  ['z', 'theta']
Reg Model:
{'outcome': 't', 'model': ' 1 + covariate_name', 'params': ['t_Intercept', 't_covariate_name'], 'link_func': <function <lambda> at 0x10e950b00>}
Uses Identity Link
Reg Model:
{'outcome': 'v', 'model': ' 1 + covariate_name + covariate_name_2', 'params': ['v_Intercept', 'v_covariate_name', 'v_covariate_name_2'], 'link_func': <function <lambda> at 0x10e950320>}
Uses Identity Link


In [11]:
hddmnn_reg.get_traces()

AttributeError: 'Uniform' object has no attribute 'trace'

In [12]:
# Sample
hddmnn_reg.sample(nmcmc, burn = 100)

boundary violation of regressor part
 [-----------------100%-----------------] 200 of 200 complete in 82.3 sec

<pymc.MCMC.MCMC at 0x149df4c10>

In [13]:
hddmnn_reg.nodes_db

Unnamed: 0,knode_name,stochastic,observed,subj,node,tag,depends,hidden,rt,response,...,t,theta,mean,std,2.5q,25q,50q,75q,97.5q,mc err
a,a,True,False,False,a,(),[],False,,,...,,,1.29505,0.363837,0.446245,1.10328,1.31346,1.56026,1.98659,0.0363837
a_std,a_std,True,False,False,a_std,(),[],False,,,...,,,0.494237,0.27999,0.106587,0.197931,0.532621,0.729738,0.996017,0.027999
a_tau,a_tau,False,False,False,a_tau,(),[],True,,,...,,,,,,,,,,
a_subj.0,a_subj,True,False,True,a_subj.0,(),[subj_idx],False,,,...,,,1.28247,0.0890284,1.14514,1.20805,1.27032,1.36239,1.4749,0.00890284
a_subj.1,a_subj,True,False,True,a_subj.1,(),[subj_idx],False,,,...,,,1.30242,0.168967,1.0358,1.23106,1.31231,1.36233,1.81,0.0168967
a_subj.2,a_subj,True,False,True,a_subj.2,(),[subj_idx],False,,,...,,,1.45676,0.0871514,1.3075,1.3897,1.46468,1.5224,1.6144,0.00871514
z_trans,z_trans,True,False,False,z_trans,(),[],True,,,...,,,,,,,,,,
z,z,False,False,False,z,(),[],False,,,...,,,0.529456,0.0251117,0.464475,0.51664,0.531172,0.545438,0.572964,0.00251117
z_std,z_std,True,False,False,z_std,(),[],False,,,...,,,0.186497,0.149012,0.0288312,0.0782578,0.124846,0.29077,0.567063,0.0149012
z_tau,z_tau,False,False,False,z_tau,(),[],True,,,...,,,,,,,,,,


In [14]:
from kabuki.analyze import post_pred_gen

In [15]:
post_pred_gen(hddmnn_reg, groupby = ['subj_idx'])

 [-----------------------133%-----------------------] 4 of 3 complete in 6.9 sec

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rt,response,model
node,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
wfpt.0,0,0,1.230755,-1,angle
wfpt.0,0,1,1.118580,1,angle
wfpt.0,0,2,1.403114,1,angle
wfpt.0,0,3,1.546864,1,angle
wfpt.0,0,4,1.162932,1,angle
...,...,...,...,...,...
wfpt.2,499,95,1.221503,-1,angle
wfpt.2,499,96,1.875936,1,angle
wfpt.2,499,97,1.546930,-1,angle
wfpt.2,499,98,1.440528,-1,angle


# Testing Graph

In [78]:
# Metadata
nmcmc = 2000
burn = 50
model = 'angle'
n_trials_per_subject = 300
n_subjects = 10

In [79]:
# test regressors only False
# add p_outliers to the generator !
data, full_parameter_dict = hddm.simulators.hddm_dataset_generators.simulator_h_c(data = None, 
                                                                                  n_subjects = n_subjects,
                                                                                  n_trials_per_subject = n_trials_per_subject,
                                                                                  model = model,
                                                                                  p_outlier = 0.00,
                                                                                  conditions = None, 
                                                                                  depends_on = None, 
                                                                                  regression_models = None,
                                                                                  regression_covariates = None,
                                                                                  group_only_regressors = False,
                                                                                  group_only = None,
                                                                                  fixed_at_default = None)

In [80]:
hddmnn_model = hddm.HDDMnn(data,
                               model = model,
                               informative = False,
                               include = hddm.simulators.model_config[model]['hddm_include'], #is_group_model = True,
                               p_outlier = 0.05)

Includes supplied:  ['z', 'theta']


In [81]:
hddmnn_model.sample(nmcmc, burn = burn)

 [-----------------100%-----------------] 2001 of 2000 complete in 572.3 sec

<pymc.MCMC.MCMC at 0x1436e70d0>

In [None]:
from kabuki.analyze import _parents_to_random_posterior_sample
#import ipdb
# def data_processor(x = None):
#     #print(x[:, 0] * x[:, 1])
#     return x[:, 0] * x[:, 1]

plot_posterior_predictive(model = hddmnn_model, plot_func = _plot_posterior_pdf_node_nn,  
                          value_range = np.arange(-5, 5, 0.01), samples = 50,
                          **{'bin_size': 0.05,
                             'plot_likelihood_raw': True,
                             'add_posterior_mean': True,
                             'alpha': 0.05,
                             'plot_type': 'step',
                             'linewidth': 2},
                           figsize = (12, 4))