In [93]:
import numpy as np
import pickle
import math
import matplotlib.pyplot as plt
import pandas as pd
import time
import pprint
import seaborn as sns
import CMR_IA as cmr
sns.set_context('paper')
# pd.set_option('display.max_columns', None)

In [94]:
with open("simu6b_data/simu6b_design.pkl", 'rb') as inp:
    df_study = pickle.load(inp)
    df_test = pickle.load(inp)
# df_study = df_study.loc[df_study.session < 100]
# df_test = df_test.loc[df_test.session < 100]

In [95]:
df_study

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,list,session
0,120,459,BELT,DIAPER,0,0
1,1315,1260,SKETCH,SEAFOOD,0,0
2,357,723,COMPANION,HOSTESS,0,0
3,1551,1202,VEAL,ROCKET,0,0
4,1081,1066,PLANE,PIGMENT,0,0
...,...,...,...,...,...,...
7195,1019,1143,PASSAGE,PUPIL,5,99
7196,293,1375,CHERRY,STEAM,5,99
7197,871,927,MARKET,MOVIE,5,99
7198,1392,1486,STREAM,TORTOISE,5,99


In [96]:
df_test

Unnamed: 0,test_itemno1,test_itemno2,test_item,correct_ans,order,rep,test,list,session,test_pos,pair_idx
0,51,-1,ASTRONAUT,1083,2,1,1,0,0,1,8
1,1202,-1,ROCKET,1551,2,1,1,0,0,2,4
2,979,-1,OPERATOR,142,1,1,1,0,0,3,11
3,134,-1,BLADE,105,1,1,1,0,0,4,9
4,1081,-1,PLANE,1066,1,1,1,0,0,5,5
...,...,...,...,...,...,...,...,...,...,...,...
14395,1019,-1,PASSAGE,1143,1,1,2,5,99,20,7196
14396,487,-1,DOORBELL,788,2,1,2,5,99,21,7189
14397,723,-1,HOSTESS,1378,2,1,2,5,99,22,7192
14398,196,-1,BULLY,786,1,1,2,5,99,23,7190


### Run CMR-IA

In [97]:
s_mat = np.load('../../Data/wordpools/ltp_FR_similarity_matrix.npy')

In [98]:
params = cmr.make_default_params()
params.update(
    beta_enc = 0.5,
    beta_rec = 0.1, # beta for retrieved item
    beta_cue = 0.45, # beta for cue
    beta_rec_post = 0.99,
    beta_distract = 0.1,
    gamma_fc = 0.35,
    gamma_cf = 0.35,
    s_fc = 0.1,
    s_cf = 0.1,
    c_thresh = 0.4,
    c_thresh_ass = 0.01,
    kappa = 0.02,
    lamb = 0.01,
    eta = 0.005,
    phi_s = 1,
    phi_d = 0.6,
    nitems_in_accumulator = 96,
    d_ass = 0.7,
    learn_while_retrieving = True,
    omega = 5,
    alpha = 1
)
params

{'beta_enc': 0.5,
 'beta_rec': 0.1,
 'beta_cue': 0.45,
 'beta_rec_post': 0.99,
 'beta_distract': 0.1,
 'phi_s': 1,
 'phi_d': 0.6,
 's_cf': 0.1,
 's_fc': 0.1,
 'kappa': 0.02,
 'eta': 0.005,
 'omega': 5,
 'alpha': 1,
 'c_thresh': 0.4,
 'c_thresh_itm': 0.5,
 'c_thresh_ass': 0.01,
 'd_ass': 0.7,
 'lamb': 0.01,
 'rec_time_limit': 60000.0,
 'dt': 10,
 'nitems_in_accumulator': 96,
 'max_recalls': 50,
 'learn_while_retrieving': True,
 'a': 2800,
 'b': 20,
 'm': 0,
 'n': 1,
 'c1': 0,
 'thresh_sigma': 0,
 'No_recall': None,
 'var_enc': 1,
 'bad_enc_ratio': 1,
 'gamma_fc': 0.35,
 'gamma_cf': 0.35}

In [99]:
df_simu, f_in, f_dif = cmr.run_success_multi_sess(params, df_study, df_test, s_mat, mode='CR-CR')
df_simu['test_pos'] = np.tile(np.arange(1,25),600)
df_simu = df_simu.merge(df_test,on=['session','list', 'test_itemno1','test_itemno2', 'test_pos'])
df_simu['correct'] = df_simu.s_resp == df_simu.correct_ans
df_simu

CMR2 Time: 22.570799589157104


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test_pos,test_item,correct_ans,order,rep,test,pair_idx,correct
0,0,0,51,-1,1188.0,1960.0,0.721665,1,ASTRONAUT,1083,2,1,1,8,False
1,0,0,1202,-1,1551.0,2530.0,0.566311,2,ROCKET,1551,2,1,1,4,True
2,0,0,979,-1,105.0,2360.0,0.522622,3,OPERATOR,142,1,1,1,11,False
3,0,0,134,-1,142.0,2770.0,0.628746,4,BLADE,105,1,1,1,9,False
4,0,0,1081,-1,1066.0,2820.0,0.619108,5,PLANE,1066,1,1,1,5,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14395,99,5,1019,-1,930.0,430.0,0.617769,20,PASSAGE,1143,1,1,2,7196,False
14396,99,5,487,-1,828.0,390.0,0.654639,21,DOORBELL,788,2,1,2,7189,False
14397,99,5,723,-1,1086.0,500.0,0.613576,22,HOSTESS,1378,2,1,2,7192,False
14398,99,5,196,-1,1181.0,590.0,0.629081,23,BULLY,786,1,1,2,7190,False


In [100]:
# get condition and congruence
df_cond = df_simu.groupby(["pair_idx","test"])['order'].mean().to_frame(name='corr_rate').reset_index()
df_cond = df_cond.pivot_table(index='pair_idx',columns='test',values='corr_rate').reset_index()
df_cond.columns = ['pair_idx','test1','test2']

def cond(x):
    test1 = x['test1']
    test2 = x['test2']
    if test1 == 1 and test2 == 1:
        return 'F-F'
    elif test1 == 1 and test2 == 2:
        return 'F-B'
    elif test1 == 2 and test2 == 1:
        return 'B-F'
    elif test1 == 2 and test2 == 2:
        return 'B-B'

df_cond['cond'] = df_cond.apply(lambda x:cond(x),axis=1)
df_cond['cong'] = df_cond.apply(lambda x: 'Identical' if x['cond'] == 'F-F' or x['cond'] == 'B-B' else 'Reversed',axis=1)
df_cond

Unnamed: 0,pair_idx,test1,test2,cond,cong
0,1,1.0,1.0,F-F,Identical
1,2,2.0,1.0,B-F,Reversed
2,3,1.0,1.0,F-F,Identical
3,4,2.0,1.0,B-F,Reversed
4,5,1.0,2.0,F-B,Reversed
...,...,...,...,...,...
7195,7196,2.0,1.0,B-F,Reversed
7196,7197,1.0,1.0,F-F,Identical
7197,7198,2.0,1.0,B-F,Reversed
7198,7199,1.0,2.0,F-B,Reversed


In [101]:
# merge condition and congruence
pairidx2cond = df_cond.loc[:,['pair_idx','cond']].set_index("pair_idx").to_dict()['cond']
pairidx2cong = df_cond.loc[:,['pair_idx','cong']].set_index("pair_idx").to_dict()['cong']
df_simu['cond'] = df_simu.apply(lambda x:pairidx2cond[x['pair_idx']],axis=1)
df_simu['cong'] = df_simu.apply(lambda x:pairidx2cong[x['pair_idx']],axis=1)
df_simu.head(24)

Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test_pos,test_item,correct_ans,order,rep,test,pair_idx,correct,cond,cong
0,0,0,51,-1,1188.0,1960.0,0.721665,1,ASTRONAUT,1083,2,1,1,8,False,B-B,Identical
1,0,0,1202,-1,1551.0,2530.0,0.566311,2,ROCKET,1551,2,1,1,4,True,B-F,Reversed
2,0,0,979,-1,105.0,2360.0,0.522622,3,OPERATOR,142,1,1,1,11,False,F-F,Identical
3,0,0,134,-1,142.0,2770.0,0.628746,4,BLADE,105,1,1,1,9,False,F-B,Reversed
4,0,0,1081,-1,1066.0,2820.0,0.619108,5,PLANE,1066,1,1,1,5,True,F-B,Reversed
5,0,0,357,-1,723.0,2840.0,0.592554,6,COMPANION,723,1,1,1,3,True,F-F,Identical
6,0,0,831,-1,1435.0,2790.0,0.654221,7,LITERATURE,1435,2,1,1,7,True,B-B,Identical
7,0,0,1188,-1,1083.0,4910.0,0.560661,8,RIB,641,2,1,1,12,False,B-F,Reversed
8,0,0,1260,-1,1315.0,3860.0,0.54773,9,SEAFOOD,1315,2,1,1,2,True,B-F,Reversed
9,0,0,12,-1,226.0,2670.0,0.668459,10,AIRPLANE,226,1,1,1,10,True,F-B,Reversed


In [102]:
# get f_in
sessions = np.unique(df_simu.session.to_numpy())
tmp_corr_fin = []
tmp_omean_fin = []
tmp_omax_fin = []

for sess in sessions:
    df_tmp = df_study.loc[df_study.session == sess]
    tmp1 = df_tmp.study_itemno1.to_numpy()
    tmp2 = df_tmp.study_itemno2.to_numpy()
    tmp = np.concatenate((tmp1, tmp2))
    tmp = np.sort(tmp)

    df_tmp1 = df_simu.loc[df_simu.session == sess]
    tmp_test = df_tmp1.test_itemno1
    tmp_corr = df_tmp1.correct_ans
    # tmp_test = df_simu.loc[df_simu.session == sess, 'test_itemno']
    # tmp_corr = df_simu.loc[df_simu.session == sess, 'correct_ans']
    testid = np.searchsorted(tmp, tmp_test)
    corrid = np.searchsorted(tmp, tmp_corr)

    fin_sess = f_dif[sess]
    # tmp_corr_fin = []
    # tmp_omean_fin = []
    # tmp_omax_fin = []
    for i in range(len(testid)):
        tmp_corr_fin.append(fin_sess[i][corrid[i]])
        tmp_omean_fin.append(np.mean(np.delete(fin_sess[i], corrid[i])))
        tmp_omax_fin.append(np.delete(fin_sess[i], corrid[i]).max())
    # df_simu.loc[df_simu.session == sess, 'corr_fin']= tmp_corr_fin
    # df_simu.loc[df_simu.session == sess, 'omean_fin'] = tmp_omean_fin
    # df_simu.loc[df_simu.session == sess, 'omax_fin'] = tmp_omax_fin

df_simu['corr_fin']= tmp_corr_fin
df_simu['omean_fin'] = tmp_omean_fin
df_simu['omax_fin'] = tmp_omax_fin
df_simu

Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test_pos,test_item,correct_ans,order,rep,test,pair_idx,correct,cond,cong,corr_fin,omean_fin,omax_fin
0,0,0,51,-1,1188.0,1960.0,0.721665,1,ASTRONAUT,1083,2,1,1,8,False,B-B,Identical,-0.459888,-0.914949,-0.406122
1,0,0,1202,-1,1551.0,2530.0,0.566311,2,ROCKET,1551,2,1,1,4,True,B-F,Reversed,-0.516881,-0.975035,-0.510702
2,0,0,979,-1,105.0,2360.0,0.522622,3,OPERATOR,142,1,1,1,11,False,F-F,Identical,-0.440001,-1.045941,-0.520514
3,0,0,134,-1,142.0,2770.0,0.628746,4,BLADE,105,1,1,1,9,False,F-B,Reversed,-4.977879,-1.074086,-0.497908
4,0,0,1081,-1,1066.0,2820.0,0.619108,5,PLANE,1066,1,1,1,5,True,F-B,Reversed,-0.485911,-1.167782,-0.504387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14395,99,5,1019,-1,930.0,430.0,0.617769,20,PASSAGE,1143,1,1,2,7196,False,B-F,Reversed,-0.117085,-0.885193,1.492344
14396,99,5,487,-1,828.0,390.0,0.654639,21,DOORBELL,788,2,1,2,7189,False,B-B,Identical,-0.543238,-0.919255,1.418120
14397,99,5,723,-1,1086.0,500.0,0.613576,22,HOSTESS,1378,2,1,2,7192,False,F-B,Reversed,-0.103860,-1.012467,1.107015
14398,99,5,196,-1,1181.0,590.0,0.629081,23,BULLY,786,1,1,2,7190,False,F-F,Identical,-0.418573,-1.067881,0.851375


In [103]:
df_fin = df_simu.groupby(['cong','test'])[['corr_fin','omean_fin','omax_fin']].mean().reset_index()
df_fin['dif'] = df_fin['corr_fin'] - df_fin['omax_fin']
df_fin

Unnamed: 0,cong,test,corr_fin,omean_fin,omax_fin,dif
0,Identical,1,-0.525368,-1.046043,0.131835,-0.657203
1,Identical,2,-0.467678,-0.989918,0.575747,-1.043425
2,Reversed,1,-0.555785,-1.043693,0.128577,-0.684362
3,Reversed,2,-0.320322,-1.004356,0.504419,-0.824741


### Analysis

In [104]:
# anal_perform used by pso
import sys
sys.path.append("../../Modeling/CMR_IA/fitting/pso_duplicate2")
from object_funcs import anal_perform_6b as anal_perform

In [105]:
# def anal_perform(df_simu):

#     # get pair
#     df_pair = pd.pivot_table(df_simu,index='pair_idx',columns='test', values= 'correct')
#     df_pair.columns = ['test1','test2']
#     test2_rsp = pd.Categorical(df_pair.test2, categories=[1,0])
#     test1_rsp = pd.Categorical(df_pair.test1, categories=[1,0])
#     df_tab = pd.crosstab(index=test2_rsp,columns=test1_rsp, rownames=['test2'], colnames=['test1'], normalize=False, dropna=False)
#     df_tab_norm = pd.crosstab(index=test2_rsp,columns=test1_rsp, rownames=['test2'], colnames=['test1'], normalize='all', dropna=False)
#     t1_t2 = df_tab_norm[1][1] # 1, 2
#     t1_f2 = df_tab_norm[1][0]
#     f1_t2 = df_tab_norm[0][1]
#     f1_f2 = df_tab_norm[0][0]
#     # print(df_tab)
#     # print(df_tab_norm)
#     # print(t1_t2, t1_f2, f1_t2, f1_f2)

#     # compute" Q
#     def Yule_Q(A, B, C, D):
#         return (A * D - B * C) / (A * D + B * C)
#     q = Yule_Q(df_tab[1][1]+0.5,df_tab[0][1]+0.5,df_tab[1][0]+0.5,df_tab[0][0]+0.5)  # add 0.5
#     # print("Q: ", q)

#     return t1_t2, t1_f2, f1_t2, f1_f2, q

In [106]:
subjects = np.unique(df_simu.session)
inde_stats = []
reve_stats = []
for subj in subjects:
    df_subj_inde = df_simu.query(f"session == {subj} and cong == 'Identical'").copy()
    inde_stats.append(list(anal_perform(df_subj_inde)))

    df_subj_reve = df_simu.query(f"session == {subj} and cong == 'Reversed'").copy()
    reve_stats.append(list(anal_perform(df_subj_reve)))

In [107]:
np.array(inde_stats)

array([[0.08333333, 0.        , 0.        , 0.91666667, 0.99574468],
       [0.05555556, 0.        , 0.08333333, 0.86111111, 0.95652174],
       [0.05555556, 0.05555556, 0.02777778, 0.86111111, 0.90909091],
       [0.08333333, 0.05555556, 0.        , 0.86111111, 0.97757848],
       [0.05555556, 0.02777778, 0.02777778, 0.88888889, 0.94610778],
       [0.11111111, 0.02777778, 0.        , 0.86111111, 0.98947368],
       [0.13888889, 0.        , 0.        , 0.86111111, 0.99711816],
       [0.08333333, 0.        , 0.        , 0.91666667, 0.99574468],
       [0.11111111, 0.        , 0.        , 0.88888889, 0.99658703],
       [0.05555556, 0.05555556, 0.        , 0.88888889, 0.96969697],
       [0.02777778, 0.08333333, 0.        , 0.88888889, 0.93069307],
       [0.08333333, 0.02777778, 0.        , 0.88888889, 0.98689956],
       [0.11111111, 0.02777778, 0.        , 0.86111111, 0.98947368],
       [0.08333333, 0.05555556, 0.02777778, 0.83333333, 0.9321267 ],
       [0.11111111, 0.        , 0.

In [108]:
np.array(reve_stats)

array([[0.08333333, 0.02777778, 0.05555556, 0.83333333, 0.9321267 ],
       [0.11111111, 0.        , 0.08333333, 0.80555556, 0.9739777 ],
       [0.08333333, 0.05555556, 0.08333333, 0.77777778, 0.83870968],
       [0.08333333, 0.02777778, 0.11111111, 0.77777778, 0.87323944],
       [0.05555556, 0.08333333, 0.13888889, 0.72222222, 0.5497076 ],
       [0.02777778, 0.02777778, 0.08333333, 0.86111111, 0.8       ],
       [0.08333333, 0.05555556, 0.02777778, 0.83333333, 0.9321267 ],
       [0.08333333, 0.        , 0.08333333, 0.83333333, 0.96774194],
       [0.08333333, 0.        , 0.05555556, 0.86111111, 0.97757848],
       [0.11111111, 0.        , 0.05555556, 0.83333333, 0.98194946],
       [0.02777778, 0.08333333, 0.02777778, 0.86111111, 0.8       ],
       [0.02777778, 0.08333333, 0.13888889, 0.75      , 0.36363636],
       [0.13888889, 0.02777778, 0.08333333, 0.75      , 0.93290735],
       [0.05555556, 0.02777778, 0.        , 0.91666667, 0.98224852],
       [0.11111111, 0.02777778, 0.

In [109]:
np.mean(inde_stats,axis=0)

array([0.09055556, 0.02194444, 0.01305556, 0.87444444, 0.97592151])

In [110]:
np.mean(reve_stats,axis=0)

array([0.08111111, 0.02222222, 0.08305556, 0.81361111, 0.89300362])

In [111]:
inde_stats_mean = np.mean(inde_stats,axis=0)
reve_stats_mean = np.mean(reve_stats,axis=0)
inde_ground_truth = np.array([0.319, 0.006, 0.012, 0.663, 0.94])
reve_ground_truth = np.array([0.293, 0.049, 0.122, 0.537, 0.96])
err = (np.mean(np.power(inde_stats_mean-inde_ground_truth,2)) + np.mean(np.power(reve_stats_mean-reve_ground_truth,2)))/2
err

0.022657415370866532

### Whole

In [112]:
dfgp_test_order = df_simu.groupby(['rep','test','order']).correct.mean().to_frame(name='corr_rate').reset_index()
dfgp_test_order

Unnamed: 0,rep,test,order,corr_rate
0,1,1,1,0.11
1,1,1,2,0.105833
2,1,2,1,0.133333
3,1,2,2,0.134444


In [113]:
df_simu.groupby(['cong','test']).correct.mean()

cong       test
Identical  1       0.112500
           2       0.103611
Reversed   1       0.103333
           2       0.164167
Name: correct, dtype: float64

In [114]:
def contingency_tab(x):
    tmp_x = pd.pivot_table(x,index='pair_idx',columns='test', values= 'correct')
    test1 = pd.Categorical(tmp_x[1], categories=[1, 0])
    test2 = pd.Categorical(tmp_x[2], categories=[1, 0])
    return pd.crosstab(index=test2,columns=test1, normalize='all')

In [115]:
dfgp_rep_cong = df_simu.groupby(['cong']).apply(contingency_tab)
dfgp_rep_cong

Unnamed: 0_level_0,col_0,1,0
cong,row_0,Unnamed: 2_level_1,Unnamed: 3_level_1
Identical,1,0.090556,0.013056
Identical,0,0.021944,0.874444
Reversed,1,0.081111,0.083056
Reversed,0,0.022222,0.813611


In [116]:
def Yule_Q(A, B, C, D):
    return (A * D - B * C) / (A * D + B * C)

In [117]:
tab_identical = dfgp_rep_cong.loc['Identical']
Yule_Q(tab_identical[1][1],tab_identical[0][1],tab_identical[1][0],tab_identical[0][0])

0.9927900182628273

In [118]:
tab_reversed = dfgp_rep_cong.loc['Reversed']
Yule_Q(tab_reversed[1][1],tab_reversed[0][1],tab_reversed[1][0],tab_reversed[0][0])

0.945586154497104

In [119]:
Yule_Q(0.319, 0.012, 0.006, 0.663)

0.9993193709853524

In [120]:
Yule_Q(0.293, 0.122, 0.049, 0.537)

0.9267935757627709