In [1]:
# MODULE IMPORTS ----

# warning settings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Data management
import pandas as pd
import numpy as np
import pickle

# Plotting
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

# Stats functionality
from statsmodels.distributions.empirical_distribution import ECDF

# HDDM
import hddm

In [2]:
from hddm.simulators.hddm_dataset_generators import simulator_h_c

In [None]:
hddm.

In [3]:
help(simulator_h_c)

Help on function simulator_h_c in module hddm.simulators.hddm_dataset_generators:

simulator_h_c(data=None, n_subjects=10, n_trials_per_subject=100, model='ddm_vanilla', conditions=None, depends_on=None, regression_models=None, regression_covariates=None, group_only_regressors=True, group_only=['z'], fixed_at_default=None, p_outlier=0.0, outlier_max_t=10.0, **kwargs)
    Flexible simulator that allows specification of models very similar to the hddm model classes. Has two major modes. When data 
    
    is supplied the function generates synthetic versions of the provided data. If no data is provided, you can supply 
    a varied of options to create complicated synthetic datasets from scratch.
    
    :Arguments:
        data: pd.DataFrame <default=None>
            Actual covariate dataset. If data is supplied its covariates are used instead of generated.
        n_subjects: int <default=5>
            Number of subjects in the datasets
        n_trials_per_subject: int <default=50

In [4]:
data = pd.read_csv('stress_data.csv')

In [5]:
data['sub_reward']

0       0.903757
1      -1.333441
2       1.649489
3      -1.333441
4      -1.333441
          ...   
6277    1.341077
6278   -1.312198
6279   -1.312198
6280    1.341077
6281    1.341077
Name: sub_reward, Length: 6282, dtype: float64

In [6]:
data['bin_reward'] = (data['sub_reward'] < 0).astype(np.int)

In [5]:
model = 'ddm'
data, param_dict = simulator_h_c(n_subjects = 1, n_trials_per_subject = 1000, model = model, 
                                 conditions = None,
                                 depends_on = None,
                                 #conditions = {'bin_reward': ['1', '2', '3']}, 
                                 #depends_on = {'v': ['bin_reward']},
                                 regression_models = None,
                                 group_only = None,
                                 p_outlier = 0.00,
                                 )

m = hddm.HDDMnn(data,
                #depends_on = {'v': ['bin_reward']},
                p_outlier=0.05,
                include = hddm.model_config.model_config[model]['hddm_include'], 
                model=model,
                is_group_model=False)

m.sample(1000, burn = 200)
#post_pred_gen(m)

Using default priors: Uninformative
Includes supplied:  ['z']
 [-----------------100%-----------------] 1000 of 1000 complete in 30.4 sec

<pymc.MCMC.MCMC at 0x14ebdca10>

In [4]:
data

Unnamed: 0,rt,response,subj_idx,covariate_name,v,a,z,t
0,0.384999,1.0,0,0.75,1.248788,1.078103,0.448055,0.001
1,0.644995,1.0,0,0.00,1.248788,1.078103,0.448055,0.001
2,2.708986,1.0,0,0.50,1.248788,1.078103,0.448055,0.001
3,0.723994,1.0,0,0.25,1.248788,1.078103,0.448055,0.001
4,0.873992,1.0,0,0.00,1.248788,1.078103,0.448055,0.001
...,...,...,...,...,...,...,...,...
9995,0.331999,1.0,9,0.50,1.396531,1.229369,0.370358,0.001
9996,0.445998,1.0,9,0.50,1.396531,1.229369,0.370358,0.001
9997,0.580996,1.0,9,0.00,1.396531,1.229369,0.370358,0.001
9998,1.633020,1.0,9,0.00,1.396531,1.229369,0.370358,0.001


In [6]:
m.knodes

[v,
 v_std,
 v_tau,
 v_subj,
 a,
 a_std,
 a_tau,
 a_subj,
 z,
 z_std,
 z_tau,
 z_subj,
 t,
 t_std,
 t_tau,
 t_subj,
 wfpt]

In [19]:
data['bin_reward_int'] = data['bin_reward'].astype(np.int)

In [20]:
data['bin_reward_int']

0       1
1       1
2       1
3       1
4       1
       ..
5995    3
5996    3
5997    3
5998    3
5999    3
Name: bin_reward_int, Length: 6000, dtype: int64

In [21]:
m = hddm.HDDMnn(data,depends_on = {'v': ['bin_reward_int']},p_outlier=0.05,
                         include = hddm.simulators.model_config[model]['hddm_include'], model=model,informative = False)

Includes supplied:  ['z']


In [35]:
help(post_pred_gen)

Help on function post_pred_gen in module kabuki.analyze:

post_pred_gen(model, groupby=None, samples=500, append_data=False, add_model_parameters=False, progress_bar=True)
    Run posterior predictive check on a model.
    
    :Arguments:
        model : kabuki.Hierarchical
            Kabuki model over which to compute the ppc on.
    
    :Optional:
        samples : int
            How many samples to generate for each node.
        groupby : list
            Alternative grouping of the data. If not supplied, uses splitting
            of the model (as provided by depends_on).
        append_data : bool (default=False)
            Whether to append the observed data of each node to the replicatons.
        progress_bar : bool (default=True)
            Display progress bar
    
    :Returns:
        Hierarchical pandas.DataFrame with multiple sampled RT data sets.
        1st level: wfpt node
        2nd level: posterior predictive sample
        3rd level: original data index
    

In [26]:
from kabuki.analyze import post_pred_gen
from kabuki.analyze import _post_pred_generate
x = _post_pred_generate(m, samples = 10, add_model_parameters = True)

AttributeError: 'HDDMnn' object has no attribute 'extended_parents'

In [29]:
observeds = m.get_observeds()

In [31]:
for tag, nodes in observeds.groupby('tag'):
    print(nodes)
    for subj_i, (node_name, bottom_node) in enumerate(nodes.iterrows()):
        my_node = bottom_node["node"]

     knode_name stochastic observed   subj  node tag depends hidden   rt  \
wfpt       wfpt      False     True  False  wfpt  ()      []  False  NaN   

     response  ...    z    t mean  std 2.5q  25q  50q  75q 97.5q mc err  
wfpt      NaN  ...  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN    NaN  

[1 rows x 24 columns]


In [40]:
my_node.parents

{'p_outlier': 0.05,
 'w_outlier': 0.1,
 'v': <pymc.distributions.new_dist_class.<locals>.new_class 'v' at 0x14ebdb190>,
 'a': <pymc.distributions.new_dist_class.<locals>.new_class 'a' at 0x14ebdc990>,
 'z': <pymc.PyMCObjects.Deterministic 'z' at 0x10922cc10>,
 't': <pymc.distributions.new_dist_class.<locals>.new_class 't' at 0x10af99c50>}

In [56]:
out = pd.DataFrame(pd.concat(_post_pred_generate(my_node, 
                              samples = 10, 
                              data = None, 
                              append_data = False, 
                              add_model_parameters = True)).mean()[['v', 'a', 'z', 't']].astype(np.float32)).T

In [55]:
out.T.v[0]

IndexError: invalid index to scalar variable.

In [50]:
out_T = pd.DataFrame(out).T

In [57]:
out.a[0]

1.7387086

In [8]:
help(post_pred_gen)

Help on function post_pred_gen in module kabuki.analyze:

post_pred_gen(model, groupby=None, samples=500, append_data=False, add_model_parameters=False, progress_bar=True)
    Run posterior predictive check on a model.
    
    :Arguments:
        model : kabuki.Hierarchical
            Kabuki model over which to compute the ppc on.
    
    :Optional:
        samples : int
            How many samples to generate for each node.
        groupby : list
            Alternative grouping of the data. If not supplied, uses splitting
            of the model (as provided by depends_on).
        append_data : bool (default=False)
            Whether to append the observed data of each node to the replicatons.
        progress_bar : bool (default=True)
            Display progress bar
    
    :Returns:
        Hierarchical pandas.DataFrame with multiple sampled RT data sets.
        1st level: wfpt node
        2nd level: posterior predictive sample
        3rd level: original data index
    

 [-----------------103%------------------] 31 of 30 complete in 102.7 sec

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rt,response
node,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
wfpt(1).0,0,0,2.510347,-1
wfpt(1).0,0,1,2.388355,-1
wfpt(1).0,0,2,1.870376,1
wfpt(1).0,0,3,0.554342,-1
wfpt(1).0,0,4,1.687368,1
...,...,...,...,...
wfpt(3).9,499,195,0.640314,1
wfpt(3).9,499,196,0.788312,1
wfpt(3).9,499,197,0.725313,1
wfpt(3).9,499,198,1.415328,1
