In [1]:
import pandas as pd
import numpy as np
from scipy import stats, special
from scipy.optimize import minimize 

import pymc3 as pm
import arviz as az

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load Data
db = pd.read_csv('data/mon.csv')

# add serial number for participants
db['subn'] = db['sub'].rank(method='dense').astype(int)-1
db['subject'] = db['subn'].astype('category')

# remove missing trials
db = db.dropna()
db['choice'] = db['choice'].astype('int')

# sort dataframe based on subject, makes it easier to compare the output of the models.
db = db.sort_values('subn').reset_index(drop=True)
db.head()

Unnamed: 0.1,Unnamed: 0,choice,value,risk,ambiguity,sub,catch,subn,subject
0,40,0,8,0.5,0.0,10,0.0,0,0
1,23,0,8,0.5,0.5,10,0.0,0,0
2,24,0,5,0.5,0.5,10,0.0,0,0
3,25,0,8,0.5,0.0,10,0.0,0,0
4,26,1,25,0.5,0.24,10,0.0,0,0


In [19]:
def MLE_riskamb(parameters):
    # extract parameters
    α, β, γ = parameters
    
    
    svLotto = (db_sub['values'].values ** α) * (1-(db_sub['risk'].values+(β * (db_sub['ambiguity'].values/2))))
    svRef = 5 ** α
    p = 1/(1+np.exp((svRef - svLotto) * γ))
    size = len(p)
    mu = np.random.binomial(1,p,size)

    
    # Calculate the log-likelihood for normal distribution
    LL = np.sum(stats.norm.logpdf(db_sub.choices, p))
    # Calculate the negative log-likelihood
    neg_LL = -1*LL
    return neg_LL 

In [20]:
subs = db['sub'].unique()
mLL = pd.DataFrame()

for sub in subs:
    db_sub = db[db['subn']==sub]
    mle_model = minimize(MLE_riskamb, np.array([1,1,1]), method='L-BFGS-B')
    print(mle_model.x)
    
for sub in subs:
    db_sub = simdata[simdata['sub'] == sub]
    mle_model = minimize(MLE_riskamb, np.array([1,1,1]), method='L-BFGS-B')
    x = mle_model.x
    temp = {'sub': sub, 'alpha' : x[0], 'beta': x[1], 'gamma': x[2]}
    mLL = mLL.append(temp,ignore_index=True)

[0.92928941 0.57332596 2.09511445]
[-1.89585783e-02  1.20349116e+01  3.10369542e+01]
[0.99613489 0.58381944 0.31999588]
[0.25797734 2.51992072 4.88813865]
[0.70358361 0.74614638 1.04801155]
[0.32829249 2.50863521 4.66926718]
[0.48609479 0.03556388 3.721745  ]
[ 0.82676483  0.05581301 34.12763101]
[ 0.77857221 -0.69635616  0.69374025]
[0.65401099 0.34706853 3.14737792]


  p = 1/(1+np.exp((svRef - svLotto) * γ))


[ 4.14321677e+00 -1.60228417e+01  4.57665684e-07]
[1.00736165 0.71678499 8.5865494 ]
[ 0.8475308   0.11952867 12.55361995]
[0.78554987 0.9975633  2.85008355]
[0.48702502 0.80246817 2.52652085]
[8.96113889 1.44376662 0.01198743]
[0.63708581 0.03890115 2.56043054]
[1.20410492 0.43127516 6.8266859 ]
[0.81559946 0.39270314 1.45091856]
[ 0.8543455  -0.10110429  1.61771782]
[0.52806603 2.89267324 1.98151877]
[1.64487903 0.61479293 1.11534241]
[ 1.58348814  0.6701473  10.49857217]
[ 0.84426642  0.08202177 10.42602013]
[ 1.03774031 -2.04326526  0.76881049]
[-1.76465624e-02  1.17665178e+01  3.34891282e+01]
[ 0.82410148  0.02144513 19.30410905]
[8.25543097e-01 2.86815876e-02 2.98891381e+01]
[ 8.26860353e-01 -1.93259879e-02  7.32429476e+01]
[0.918993   1.14785581 0.61264729]
[0.65283269 0.39463594 2.02394277]
[ 0.64990999  0.56950028 21.52728775]
[ 1.47476311 -0.36702331  8.59168415]
[ 1.74272187 -1.47811996  0.15144421]
[0.44596211 4.64352595 3.27814503]
[0.69063861 0.23456011 2.87607534]
[0.715

In [None]:
n_sub = max(db.subn)+1
n_trails = 84
total_trials = 

# constant in this experiment
refValue       = 5
refProbability = 1
refAmbiguity   = 0

refProbabilities = np.tile(refProbability, n_trails)
refValues        = np.tile(refValue,       n_trails)
refAmbiguities   = np.tile(refAmbiguity,   n_trails)

In [None]:
values    = np.array(db.value)
risk      = np.array(db.risk)
ambiguity = np.array(db.ambiguity)
choices   = np.array(db.choice)

n_trials = np.arange(len(choices))

sub_idx = np.arange(numsubjs)
sub_idx = np.repeat(sub_idx, 84)
sub_idx = db.subn.tolist()


In [None]:
with pm.Model() as RiskAmbOld:
    # hyper
    rMu = pm.Normal('rMu', 0,1)
    rSig = pm.Exponential('rSig', 1)
    aMu = pm.Normal('aMu', 0,1)
    aSig = pm.Exponential('aSig', 1)
    
    nMu = pm.Normal('nMu', 0,1)
    nSig = pm.Exponential('nSig', 1)
    
    
    α = pm.Lognormal('α', rMu, rSig, shape = numsubjs)
    β = pm.Normal('β', aMu, aSig, shape = numsubjs)
    γ = pm.Lognormal('γ',nMu , nSig, shape = numsubjs)
    # Priors for unknown model parameters
    
      
    # Expected value of outcome
    svLotto = (values ** α[sub_idx]) * (risk-(β[sub_idx] * (ambiguity/2)))
    svRef = 5 ** α[sub_idx]
    p = (svLotto - svRef)/γ[sub_idx]
    mu = pm.invlogit(p)
       
    # Likelihood (sampling distribution) of observations
    #Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=db.choice)
    choice = pm.Binomial('choice',1, mu, observed=choices)
    trace2 = pm.sample(2000, return_inferencedata=True, target_accept=0.95)

In [None]:
with pm.Model() as RiskAmb:
    # hyper
    a = pm.Poisson('a', 1)
    b = pm.Poisson('b', 1)
    
    aMu = pm.Normal('aMu', 0,1)
    aSig = pm.Exponential('aSig', 1)
    
    nMu = pm.Normal('nMu', 0,1)
    nSig = pm.Exponential('nSig', 1)
    
    
    α = pm.Beta('α', a, b, shape = numsubjs)
    β = pm.Normal('β', aMu, aSig, shape = numsubjs)
    γ = pm.Lognormal('γ',nMu , nSig, shape = numsubjs)
    # Priors for unknown model parameters
    
    riskTol = α * 2
    

    # Expected value of outcome
    svLotto = (values ** riskTol[sub_idx]) * (risk-(β[sub_idx] * (ambiguity/2)))
    svRef = 5 ** riskTol[sub_idx]
    p = (svLotto - svRef)/γ[sub_idx]
    mu = pm.invlogit(p)
       
    # Likelihood (sampling distribution) of observations
    #Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=db.choice)
    choice = pm.Binomial('choice',1, mu, observed=choices)
    trace3 = pm.sample(2000, return_inferencedata=True, nuts={'target_accept':0.95})

In [None]:
az.plot_trace(trace3, var_names=['α','β','γ'])

In [None]:
model_compare = az.compare(
    {
        "old": trace2,
        "new": trace3,
    }
)
az.plot_compare(model_compare, figsize=(12, 4))

plt.show()


In [None]:
with RiskAmb:
    ppc = pm.sample_posterior_predictive(
        trace3
    )

In [None]:
ppc['choice'].shape

In [None]:
az.plot_ppc(az.from_pymc3(posterior_predictive=ppc, model=RiskAmb))

In [None]:
pc = az.from_pymc3(posterior_predictive=ppc, model=RiskAmb)
pp = np.mean(pc.posterior_predictive.choice.values, axis=1)

In [None]:
np.sum((choices-pp)**2)

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [None]:
pp

In [None]:
x = pp[0].T.shape

In [None]:
len(choices)

In [None]:
cf_matrix = confusion_matrix(choices, pp[0].T)
print(cf_matrix)

In [None]:
az.summary(trace2, var_names=['α'])[:5]

In [None]:
az.summary(trace2, var_names=['β'])[:5]

In [None]:
df = db[['sub','subn']].sort_values('subn').drop_duplicates().reset_index(drop=True)

age = pd.read_excel('/media/Data/Lab_Projects/Aging/aging_session_log.xlsx', sheet_name='scans').iloc[:, [1,7,8]]
age.columns.values[0] = "sub"
age['sub'] = age['sub'].map(lambda x: int(x.lstrip('AG_')))

df = df.merge(age, left_on='sub', right_on='sub')
df.head()

In [None]:
alpha = az.summary(trace3, var_names=['α'])[['mean','sd']]
beta = az.summary(trace3, var_names=['β'])[['mean','sd']]

alpha = alpha.rename(columns={'mean': "alpha", 'sd': 'alpha_sd'}).reset_index(drop=True)
beta = beta.rename(columns={'mean': "beta", 'sd': 'beta_sd'}).reset_index(drop=True)

df = df.merge(alpha, left_index=True, right_index=True)
df = df.merge(beta, left_index=True, right_index=True)

df.head()

In [None]:
sns.regplot(x='Age', y='alpha', data=df)

In [None]:
sns.regplot(x='Age', y='beta', data=df)

In [None]:
d10 = db[db['sub']==10]
d10.head()

In [None]:
sns.regplot(x='value',y='choice', data=d10, logistic=True)

In [None]:
svRef = 5 ** df.alpha[0]
svLotto = d10.value ** df.alpha[0] * (d10.risk - df.beta[0] * (d10.ambiguity/2))
p = sp.special.expit(svLotto - svRef)

d10['sim'] = np.random.binomial(1, p, len(p))
d10.head()

In [None]:
sns.scatterplot(x='value', y='choice', data = d10, alpha=0.3)
sns.regplot(x='value',y='sim', data=d10, logistic=True, scatter=False , color='red')
sns.regplot(x='value',y='choice', data=d10, logistic=True, scatter=False , color='green')

In [None]:
db.groupby('choice').count()