In [8]:
import numpy as np
import math
import scipy as sp
from scipy import optimize
from numpy import genfromtxt
import pandas as pd 
import glob
import pymc as pm
import arviz as az
import statistics as stats
import matplotlib.pyplot as plt
import os,sys

In [9]:
# getting the name of the directory
# where the this file is present.
# current = os.path.dirname(os.path.realpath(__file__))
current = os.path.dirname(os.getcwd())
 
# Getting the parent directory name
# where the current directory is present.
# parent = os.path.dirname(os.path.dirname(current))
parent = current
# print(parent)
#/Users/pizarror/IDM

# adding the parent directory to
# the sys.path.
sys.path.append(parent)

from IDM_model.src import model_functions as mf


In [10]:
def read_load_data(subject='23_IDM_0144',fn='/tmp',alpha0=1.0,cols=[]):
    cdd_df = pd.read_csv(fn)
    task='cdd'
    cdd_df = mf.drop_pract(cdd_df,task=task)
    cdd_df,response_rate = mf.drop_non_responses(cdd_df,task=task,verbose=False) 
    data = mf.get_data(cdd_df,cols,alpha_hat=alpha0)[0]
    data['subject'] = subject
    return data 

def diganostic_plots(trace,coords={},var_names=['kappa','gamma'],figsize=(10,10)):
    # increase number of subplots
    # az.rcParams["plot.max_subplots"] = nb_subjects*len(var_names)
    
    az.plot_trace(trace, var_names=var_names,coords=coords,compact=False)
    plt.tight_layout()
    
    # az.plot_pair(trace,kind='kde', var_names=var_names,marginals=True)
    az.plot_posterior(trace,var_names=var_names,coords=coords)

    fig, axes = plt.subplots(1,len(var_names), figsize=figsize)
    az.plot_rank(trace,var_names=var_names,coords=coords,ax=axes)
    fig.tight_layout()

    fig, axes = plt.subplots(1,len(var_names), figsize=figsize)
    axes = az.plot_rank(trace,var_names=var_names, coords=coords,kind="vlines",vlines_kwargs={'lw':0}, marker_vlines_kwargs={'lw':3},ax=axes)
    # plt.ylim([-0.001,0.001])
    plt.tight_layout()



## Bayesian Hierarchical Model (BHM)

We pooled all data together and ran simple BH model in complete_pool_as_prior.

We are using complete pooling to generate priors for when we implement a higherarchical bayesian model.

### Load data

We will load the data from all participants so we can run the modeling schema

In [11]:
# Takes about 10 seconds

# we will change this when we change utility to 1st level analysis (or split)
split_dir = '/Volumes/UCDN/datasets/IDM/split/'
save_dir = '/Volumes/UCDN/datasets/IDM/BH/csv'
subjs = sorted(glob.glob(os.path.join(split_dir,'23_IDM_*')))
task = 'cdd'
cols = ['cdd_choice','cdd_immed_amt','cdd_delay_amt','cdd_immed_wait','cdd_delay_wait','alpha']
data = pd.DataFrame(columns=['subject']+cols)

for subj_id,s in enumerate(subjs):
    subject = os.path.basename(s)
    fn  = os.path.join(s,task,'{}_{}.csv'.format(os.path.basename(s),task))
    if os.path.exists(fn):
        subj_data = read_load_data(subject=subject,fn=fn,alpha0=1.0,cols=cols)
        for c in cols:
            subj_data[c] = subj_data[c].astype(float)        
        subj_data['subject_id'] = int(subj_id)
        data = pd.concat([data,subj_data],ignore_index=True)

data.head(10)


Unnamed: 0,subject,cdd_choice,cdd_immed_amt,cdd_delay_amt,cdd_immed_wait,cdd_delay_wait,alpha,subject_id
0,23_IDM_0001,1.0,5.0,26.0,0.0,29.0,1.0,0.0
1,23_IDM_0001,1.0,2.0,22.0,0.0,90.0,1.0,0.0
2,23_IDM_0001,0.0,2.0,6.0,0.0,90.0,1.0,0.0
3,23_IDM_0001,0.0,15.0,15.0,0.0,151.0,1.0,0.0
4,23_IDM_0001,1.0,15.0,55.0,0.0,90.0,1.0,0.0
5,23_IDM_0001,1.0,2.0,65.0,0.0,29.0,1.0,0.0
6,23_IDM_0001,0.0,15.0,20.0,0.0,59.0,1.0,0.0
7,23_IDM_0001,1.0,15.0,26.0,0.0,6.0,1.0,0.0
8,23_IDM_0001,0.0,5.0,10.0,0.0,149.0,1.0,0.0
9,23_IDM_0001,0.0,5.0,4.0,0.0,31.0,1.0,0.0


In [12]:

subjects = data['subject'].unique()
subjects = subjects[:10]
# print(subjects)
data = data.loc[data['subject'].isin(subjects)]


In [13]:

subjects = data['subject'].unique()
nb_subj = subjects.shape[0]
nb_trials = data.shape[0]//nb_subj
subj_id_list = data['subject_id'].to_list()
# subj_id = [int(s) for s in subj_id_list]

subj_id = data['subject'].to_list()

old_id = np.array([ [s]*nb_trials for s in range(nb_subj) ]).flatten()

delay_amt = data['cdd_delay_amt'].values
delay_wait = data['cdd_delay_wait'].values
immed_amt = data['cdd_immed_amt'].values
immed_wait = data['cdd_immed_wait'].values
choices = data['cdd_choice'].values


In [16]:
np.arange(len(subj_id))

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

### Load results from completely_pooled_model

We use the kappa and gamma from previous run model

In [7]:

fn = os.path.join(save_dir,'completely_pooled_model.csv')
pool_model = pd.read_csv(fn,index_col=0)

mu_kappa_hat,std_kappa_hat = pool_model.loc['kappa[0]','mean'],pool_model.loc['kappa[0]','sd']
mu_gamma_hat,std_gamma_hat = pool_model.loc['gamma[0]','mean'],pool_model.loc['gamma[0]','sd']

print('For kappa, use the following (mu,sigma) : ({},{})'.format(mu_kappa_hat, std_kappa_hat))
print('For gamma, use the following (mu,sigma) : ({},{})'.format(mu_gamma_hat, std_gamma_hat))

For kappa, use the following (mu,sigma) : (0.0202632834,0.0005014986)
For gamma, use the following (mu,sigma) : (0.1300484166,0.0027491151)


In [None]:
coords = {"Level": ["Basement", "Floor"], "obs_id": np.arange(floor.size)}
with pm.Model(coords=coords) as pooled_model:
    floor_idx = pm.Data("floor_idx", floor, dims="obs_id")
    a = pm.Normal("a", 0.0, sigma=10.0, dims="Level")

    theta = a[floor_idx]
    sigma = pm.Exponential("sigma", 1.0)

    y = pm.Normal("y", theta, sigma=sigma, observed=log_radon, dims="obs_id")

pm.model_to_graphviz(pooled_model)

In [90]:
# We will fit a model for each subject

coords = {'subject':np.arange(subj_id.size)}
with pm.Model() as model_simple:

    # Hyperparameters for k
    # mu_kappa_hyper = pm.Beta('mu_kappa_hyper',mu=mu_kappa_hat,sigma=0.1)
    sd_kappa_hyper = pm.Beta('sd_kappa_hyper',mu=std_kappa_hat,sigma=0.01)

    # use above mean and stdev to define kappa and gamma, the posterior from the pooled is now our prior
    # kappa = pm.Beta('kappa',mu=mu_kappa_hyper,sigma=std_kappa_hat,shape=np.size(np.unique(subj_id)))
    kappa = pm.HalfNormal('kappa',sigma=sd_kappa_hyper,shape=np.size(np.unique(subj_id)))
    gamma = pm.HalfNormal('gamma',sigma=0.02,shape=np.size(np.unique(subj_id)))
    
    prob = pm.Deterministic('prob', 1 / (1 + pm.math.exp(-gamma[subj_id] * ( delay_amt/(1+(kappa[subj_id]*delay_wait)) 
                                                                            - immed_amt/(1+(kappa[subj_id]*immed_wait)) ))))

    y_1 = pm.Bernoulli('y_1',p=prob,observed=choices)

    trace_prior = pm.sample(1000, tune=100, cores=2,target_accept=0.95)


# This is how you get a nice array. Note that this returns a pandas DataFrame, not a numpy array. Indexing is totally different.
summary= az.summary(trace_prior,round_to=10)
fn = os.path.join(save_dir,"BHM_model_coords.csv")
print('Saving to : {}'.format(fn))
summary.to_csv(fn)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [sd_kappa_hyper, kappa, gamma]


Sampling 2 chains for 100 tune and 1_000 draw iterations (200 + 2_000 draws total) took 6 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


Saving to : /Volumes/UCDN/datasets/IDM/BH/csv/BHM_model_2chains.csv
