In [1]:
# Load modules
import hddm
import tensorflow as tf
import matplotlib
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import scipy as scp
import psutil
from time import time
from copy import deepcopy
import os
import pickle
import argparse
import yaml as yml
from multiprocessing import Pool
from functools import partial


In [41]:
def sim_kosher_check(data = None,
                     perc_cutoff = 0.05,
                     response_options = [0, 1]):

    for resp in response_options:
        if np.sum(data.response == resp) < int(data.shape[0] * perc_cutoff):
            print(np.sum(data.response == resp) / data.shape[0])
            return 0
        else:
            print(np.sum(data.response == resp) / data.shape[0])
    return 1

In [None]:
hddm.simulators.simulator_single_subject()

In [3]:
help(hddm.simulators.simulator_single_subject)

Help on function simulator_single_subject in module hddm.simulators.hddm_dataset_generators:

simulator_single_subject(parameters=(0, 0, 0), p_outlier=0.0, max_rt_outlier=10.0, model='angle', n_samples=1000, delta_t=0.001, max_t=20, bin_dim=None, bin_pointwise=False)
    Generate a hddm-ready dataset from a single set of parameters
    
    :Arguments:
        parameters: dict, list or numpy array
            Model parameters with which to simulate. Dict is preferable for informative error messages.
            If you know the order of parameters for your model of choice, you can also directly supply a
            list or nump.array which needs to have the parameters in the correct order.
        p_outlier: float between 0 and 1 <default=0>
            Probability of generating outlier datapoints. An outlier is defined
            as a random choice from a uniform RT distribution
        max_rt_outlier: float > 0 <default=10.0>
            Using max_rt_outlier (which is commonly define

In [6]:
help(hddm.simulators.hddm_dataset_generators)

Help on module hddm.simulators.hddm_dataset_generators in hddm.simulators:

NAME
    hddm.simulators.hddm_dataset_generators

FUNCTIONS
    ddm(...)
    
    ddm_flexbound(...)
    
    ddm_flexbound_mic2_adj(...)
    
    ddm_flexbound_par2(...)
    
    ddm_flexbound_seq2(...)
    
    ddm_sdv(...)
    
    full_ddm(...)
    
    full_ddm_vanilla(...)
    
    hddm_preprocess(simulator_data=None, subj_id='none', keep_negative_responses=False, add_model_parameters=False, keep_subj_idx=True)
        Takes simulator data and turns it into HDDM ready format.
        
        :Arguments:
            simulator_data: tuple
                Output of e.g. the hddm.simulators.basic_simulator function.
            subj_id: str <default='none'>
                Subject id to attach to returned dataset
            keep_negative_responses: bool <default=False>
                Whether or not to turn negative responses into 0
            add_model_parameters: bool <default=False>
                Whet

In [8]:
help(hddm.simulators.make_parameter_vectors_nn)

Help on function make_parameter_vectors_nn in module hddm.simulators.hddm_dataset_generators:

make_parameter_vectors_nn(model='angle', param_dict=None, n_parameter_vectors=10)
    Generates a (number of) parameter vector(s) for a given model.
    
    :Arguments:
    
        model: str <default='angle'>
            String that specifies the model to be simulated.
            Current options include, 'angle', 'ornstein', 'levy', 'weibull', 'full_ddm'
        param_dict: dict <default=None>
            Dictionary of parameter values that you would like to pre-specify. The dictionary takes the form (for the simple examples of the ddm),
            {'v': [0], 'a': [1.5]} etc.. For a given key supply either a list of length 1, or a list of
            length equal to the n_parameter_vectors argument supplied.
        n_parameter_vectors: int <default=10>
            Nuber of parameter vectors you want to generate
    
    Return: pandas.DataFrame
            Columns are parameter names an

In [13]:
param_sets = hddm.simulators.make_parameter_vectors_nn(model = 'ddm',
                                          n_parameter_vectors = 1)

                                          

In [48]:
model_data

(           rt  response subj_idx         v         a         z         t
 0    0.929577       0.0     none -1.655088  0.896051  0.675431  0.072585
 1    0.720580       0.0     none -1.655088  0.896051  0.675431  0.072585
 2    0.367585       0.0     none -1.655088  0.896051  0.675431  0.072585
 3    0.888578       0.0     none -1.655088  0.896051  0.675431  0.072585
 4    0.506583       0.0     none -1.655088  0.896051  0.675431  0.072585
 ..        ...       ...      ...       ...       ...       ...       ...
 995  0.351585       0.0     none -1.655088  0.896051  0.675431  0.072585
 996  0.314585       1.0     none -1.655088  0.896051  0.675431  0.072585
 997  0.257585       1.0     none -1.655088  0.896051  0.675431  0.072585
 998  0.535582       0.0     none -1.655088  0.896051  0.675431  0.072585
 999  0.692580       0.0     none -1.655088  0.896051  0.675431  0.072585
 
 [1000 rows x 7 columns],
 {'v': -1.6550884634967642,
  'a': 0.8960513527898644,
  'z': 0.6754308132144178,
  

In [75]:
model = 'ddm'
param_list = hddm.model_config.model_config[model]['params']
choice_options = [0, 1]
data_dict = {}
cnt = 0
simple_cnt = 0
while cnt < 256:
    param_sets = hddm.simulators.make_parameter_vectors_nn(model = model,
                                                           n_parameter_vectors = 1)
    
    model_data = hddm.simulators.simulator_single_subject(model = model,
                                                          parameters = param_sets.loc[0, :][param_list].values, 
                                                          p_outlier = 0.0,
                                                          n_samples = 1000)
    
    if sim_kosher_check(data = model_data[0], perc_cutoff = 0.05, response_options = [0, 1]):
        data_dict[cnt] = {}
        data_dict[cnt]['dataset'] = model_data[0]
        data_dict[cnt]['dataset']['subj_idx'] = cnt
        data_dict[cnt]['param_dict'] = model_data[1]
        cnt += 1
        print(cnt)
    else:
        print('defective dataset')
    simple_cnt += 1

print(simple_cnt)

pickle.dump(data_dict, open('data/param_recov_dataset_' + model + '.pickle', 'wb'))

Model:  ddm
Parameters needed:  ['v', 'a', 'z', 't']
[-2.35347604  1.86248481  0.6598591   1.03749335]
0.999
0.001
defective dataset
Model:  ddm
Parameters needed:  ['v', 'a', 'z', 't']
[-2.32396072  1.91068124  0.52309471  0.93398311]
1.0
0.0
defective dataset
Model:  ddm
Parameters needed:  ['v', 'a', 'z', 't']
[-2.04062783  0.7848648   0.2374709   1.21816681]
0.994
0.006
defective dataset
Model:  ddm
Parameters needed:  ['v', 'a', 'z', 't']
[0.79773668 1.41313376 0.62557724 1.91192864]
0.041
defective dataset
Model:  ddm
Parameters needed:  ['v', 'a', 'z', 't']
[0.44681479 1.72227756 0.16006018 1.09471782]
0.577
0.423
1
Model:  ddm
Parameters needed:  ['v', 'a', 'z', 't']
[-1.22684465  1.7519352   0.32414355  1.94926627]
0.997
0.003
defective dataset
Model:  ddm
Parameters needed:  ['v', 'a', 'z', 't']
[-0.3604802   0.46276488  0.59842151  1.05415928]
0.496
0.504
2
Model:  ddm
Parameters needed:  ['v', 'a', 'z', 't']
[-0.99889117  0.49729877  0.18350976  1.68560724]
0.925
0.075
3
Mo

In [35]:
model_data[0].response == 0.0

0      True
1      True
2      True
3      True
4      True
       ... 
995    True
996    True
997    True
998    True
999    True
Name: response, Length: 1000, dtype: bool

In [37]:
model_data

(           rt  response subj_idx         v         a         z         t
 0    1.735443       0.0     none -1.876421  2.356659  0.489868  0.024419
 1    0.983410       0.0     none -1.876421  2.356659  0.489868  0.024419
 2    1.073412       0.0     none -1.876421  2.356659  0.489868  0.024419
 3    1.626438       0.0     none -1.876421  2.356659  0.489868  0.024419
 4    1.132415       0.0     none -1.876421  2.356659  0.489868  0.024419
 ..        ...       ...      ...       ...       ...       ...       ...
 995  0.509416       0.0     none -1.876421  2.356659  0.489868  0.024419
 996  0.543416       0.0     none -1.876421  2.356659  0.489868  0.024419
 997  0.797412       0.0     none -1.876421  2.356659  0.489868  0.024419
 998  2.020456       0.0     none -1.876421  2.356659  0.489868  0.024419
 999  2.365432       0.0     none -1.876421  2.356659  0.489868  0.024419
 
 [1000 rows x 7 columns],
 {'v': -1.876421184995665,
  'a': 2.356659141427057,
  'z': 0.4898682565767404,
  't

In [67]:
my_data = pickle.load(open('data/param_recov_dataset_ornstein.pickle', 'rb'))

In [68]:
my_data[0].keys()

dict_keys(['dataset', 'param_dict'])

In [69]:
my_data[0]['dataset']

Unnamed: 0,rt,response,subj_idx,v,a,z,g,t
0,6.169662,1.0,none,0.715144,1.209869,0.334724,0.33413,0.571884
1,1.543875,0.0,none,0.715144,1.209869,0.334724,0.33413,0.571884
2,0.862884,0.0,none,0.715144,1.209869,0.334724,0.33413,0.571884
3,1.006882,1.0,none,0.715144,1.209869,0.334724,0.33413,0.571884
4,0.895884,0.0,none,0.715144,1.209869,0.334724,0.33413,0.571884
...,...,...,...,...,...,...,...,...
995,1.427877,0.0,none,0.715144,1.209869,0.334724,0.33413,0.571884
996,2.059898,1.0,none,0.715144,1.209869,0.334724,0.33413,0.571884
997,1.357878,1.0,none,0.715144,1.209869,0.334724,0.33413,0.571884
998,1.068882,1.0,none,0.715144,1.209869,0.334724,0.33413,0.571884
