## Parameter recovery

We simulated data for a set of design and various ground truth parameters. Now we will try to estimate those parameters from the simulated data

In [1]:
# Built-in/Generic Imports
import os,sys
import glob

# Libs
import numpy as np
import pandas as pd
import pymc as pm
import arviz as az

import logging
logger = logging.getLogger("pymc")
logger.setLevel(logging.ERROR)

In [6]:

def estimate_bhm(subj_id,design_df,choices,type='single'):

    delay_amt = design_df['cdd_delay_amt'].values
    delay_wait = design_df['cdd_delay_wait'].values
    immed_amt = design_df['cdd_immed_amt'].values
    immed_wait = design_df['cdd_immed_wait'].values
    
    # We will fit a model for each subject
    with pm.Model() as model_simple:

        # Hyperparameters for kappa
        mu_kappa_hyper = pm.Beta('mu_kappa_hyper',mu=0.02,sigma=0.01)
        # use the same hyper SD for both parameters
        sd_hyper = pm.LogNormal('sd_hyper',sigma=1)

        kappa = pm.LogNormal('kappa',mu=mu_kappa_hyper,sigma=sd_hyper,shape=np.size(np.unique(subj_id)))
        gamma = pm.HalfNormal('gamma',sigma=sd_hyper,shape=np.size(np.unique(subj_id)))
        
        prob = pm.Deterministic('prob', 1 / (1 + pm.math.exp(-gamma[subj_id] * ( delay_amt/(1+(kappa[subj_id]*delay_wait)) 
                                                                                - immed_amt/(1+(kappa[subj_id]*immed_wait)) ))))

        y_1 = pm.Bernoulli('y_1',p=prob,observed=choices)

        trace_prior = pm.sample(10000, tune=1000, cores=2,target_accept=0.98,progressbar=False)

    # This is how you get a nice array. Note that this returns a pandas DataFrame, not a numpy array. Indexing is totally different.
    summary= az.summary(trace_prior,round_to=10)
    if type=='single':
        kappa_hat = summary['mean'].loc['kappa[{}]'.format(0)]
        gamma_hat = summary['mean'].loc['gamma[{}]'.format(0)]
    elif type=='aggregate':
        # kappa_hat = summary['mean'].ix[[x for x in summary.index if 'kappa' in x]]
        # gamma_hat = summary['mean'].ix[[x for x in summary.index if 'kappa' in x]]
        kappa_hat = [summary['mean'].loc['kappa[{}]'.format(x)] for x in set(subj_id)]
        gamma_hat = [summary['mean'].loc['gamma[{}]'.format(x)] for x in set(subj_id)]
    return kappa_hat,gamma_hat


In [3]:
fn = os.path.join('simul','ground_truth.csv')
params_df = pd.read_csv(fn,index_col=0)

fn = os.path.join('simul','design_set.csv')
design_df_single = pd.read_csv(fn,index_col=0)

simulated_data = sorted(glob.glob(os.path.join('simul','response','*.csv')))

subj_id = []
choices = []
design_list = []

for index,fn in enumerate(simulated_data):
    print(fn)
    df = pd.read_csv(fn,index_col=0)
    choices += df['response'].values.tolist()
    subj_id += [index]*len(df['response'])
    design_list += [design_df_single]

design_df = pd.concat(design_list,axis=0)

kappa_hat,gamma_hat = estimate_bhm(subj_id,design_df,choices,type='aggregate')

params_df['kappa_bhm'] = kappa_hat
params_df['gamma_bhm'] = gamma_hat
params_df

simul/response/p0000.csv
simul/response/p0001.csv
simul/response/p0002.csv
simul/response/p0003.csv
simul/response/p0004.csv
simul/response/p0005.csv
simul/response/p0006.csv
simul/response/p0007.csv
simul/response/p0008.csv


Unnamed: 0,kappa_gt,gamma_gt,kappa_bhm,gamma_bhm
0,1e-05,1.666667,7.3e-05,1.839424
1,1e-05,3.333333,4.8e-05,7.917413
2,1e-05,5.0,4.9e-05,7.955232
3,0.003162,1.666667,0.0032,7.80098
4,0.003162,3.333333,0.003659,6.45611
5,0.003162,5.0,0.003428,4.693315
6,1.0,1.666667,1.05351,6.553348
7,1.0,3.333333,1.053671,6.501396
8,1.0,5.0,0.897277,6.945248


In [7]:

fn = os.path.join('simul','ground_truth.csv')
params_df = pd.read_csv(fn,index_col=0)

fn = os.path.join('simul','design_set.csv')
design_df = pd.read_csv(fn,index_col=0)

simulated_data = sorted(glob.glob(os.path.join('simul','response','*.csv')))
kappa_hat,gamma_hat = [],[]

for index,fn in enumerate(simulated_data):
    print(fn)
    df = pd.read_csv(fn,index_col=0)
    choices = df['response']
    subj_id = [0]*len(choices)
    kh,gh = estimate_bhm(subj_id,design_df,choices,type='single')
    kappa_hat += [kh]
    gamma_hat += [gh]

params_df['kappa_bhm'] = kappa_hat
params_df['gamma_bhm'] = gamma_hat
params_df

simul/response/p0000.csv
simul/response/p0001.csv


  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


simul/response/p0002.csv
simul/response/p0003.csv
simul/response/p0004.csv
simul/response/p0005.csv
simul/response/p0006.csv
simul/response/p0007.csv
simul/response/p0008.csv


Unnamed: 0,kappa_gt,gamma_gt,kappa_bhm,gamma_bhm
0,1e-05,1.666667,9.4e-05,1.828369
1,1e-05,3.333333,4.3e-05,10.284423
2,1e-05,5.0,4.3e-05,10.239428
3,0.003162,1.666667,0.003204,6.093589
4,0.003162,3.333333,0.003694,4.736411
5,0.003162,5.0,0.003448,3.673022
6,1.0,1.666667,1.059101,1.48308
7,1.0,3.333333,1.057012,1.504523
8,1.0,5.0,0.929928,1.792562
