In [172]:
import numpy as np
import pickle
import math
from scipy.stats import sem
import matplotlib.pyplot as plt
import pandas as pd
import time
import pprint
import seaborn as sns
import statsmodels.formula.api as smf
import CMR_IA as cmr
import scipy as sp
sns.set_context('paper')
pd.set_option('display.max_columns', None)

In [173]:
with open("../Data/simu6c_design.pkl", 'rb') as inp:
    df_study = pickle.load(inp)
    df_test = pickle.load(inp)
# df_study = df_study.loc[df_study.session < 500]
# df_test = df_test.loc[df_test.session < 500]

In [174]:
df_study

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,session
0,140,838,BLUEBERRY,LOCK,0
1,1587,1240,WARDROBE,SAXOPHONE,0
2,1132,702,PROTON,HELMET,0
3,299,1282,CHILD,SHELTER,0
4,1391,1059,STRAWBERRY,PICKLE,0
...,...,...,...,...,...
11995,301,1184,CHIMNEY,REPORT,999
11996,57,965,ATTIRE,NUT,999
11997,155,1112,BOULDER,POSSUM,999
11998,106,1368,BEAR,STAKE,999


In [175]:
df_test

Unnamed: 0,test_itemno,test_item,correct_ans,order,rep,test,session,test_pos,pair_idx
0,210,CABBAGE,327,1,1,1,0,1,11
1,1251,SCOTCH,819,2,1,1,0,2,8
2,145,BODY,1198,1,1,1,0,3,7
3,1059,PICKLE,1391,2,1,1,0,4,5
4,711,HOE,1519,1,1,1,0,5,9
...,...,...,...,...,...,...,...,...,...
23995,965,NUT,57,2,1,2,999,20,11997
23996,1075,PISTOL,644,2,1,2,999,21,11993
23997,1090,PLUM,1609,2,1,2,999,22,11991
23998,1184,REPORT,301,2,1,2,999,23,11996


### Run CMR-IA

In [176]:
s_mat = np.load('../Data/wordpools/ltp_FR_similarity_matrix.npy')

In [177]:
params = cmr.make_default_params()
params.update(
    beta_enc = 0.3,
    beta_rec = 0.7,
    beta_rec_post = 0.1,
    gamma_fc = 0.5,
    gamma_cf = 0.5,
    s_fc = 0.1,
    s_cf = 0.1,
    c_thresh = 0.01,
    c_thresh_ass = 0.01,
    a = 2800,
    b = 2.14,
    m = 0,
    n = 1,
    kappa = 0.02,
    lamb = 0.01,
    eta = 0.01,
    phi_s = 1,
    phi_d = 0.6,
    nitems_in_accumulator = 10,
    d_ass = 0.8
)
params

{'beta_enc': 0.3,
 'beta_rec': 0.7,
 'beta_rec_new': 0.5,
 'beta_rec_post': 0.1,
 'beta_distract': None,
 'phi_s': 1,
 'phi_d': 0.6,
 's_cf': 0.1,
 's_fc': 0.1,
 'kappa': 0.02,
 'eta': 0.01,
 'omega': 8,
 'alpha': 4,
 'c_thresh': 0.01,
 'c_thresh_ass': 0.01,
 'd_ass': 0.8,
 'lamb': 0.01,
 'rec_time_limit': 60000.0,
 'dt': 10,
 'nitems_in_accumulator': 10,
 'max_recalls': 50,
 'learn_while_retrieving': False,
 'a': 2800,
 'b': 2.14,
 'm': 0,
 'n': 1,
 'c1': 0,
 'gamma_fc': 0.5,
 'gamma_cf': 0.5}

In [178]:
df_simu, f_in = cmr.run_norm_cr_multi_sess(params,df_study, df_test ,s_mat)
df_simu['test_pos'] = np.tile(np.arange(1,25),1000)
df_simu

CMR Time: 22.556087255477905


Unnamed: 0,session,test_itemno,s_resp,s_rt,test_pos
0,0,210,327.0,1380.0,1
1,0,1251,819.0,1690.0,2
2,0,145,1198.0,1250.0,3
3,0,1059,145.0,1430.0,4
4,0,711,1519.0,1350.0,5
...,...,...,...,...,...
23995,999,965,57.0,1630.0,20
23996,999,1075,644.0,1570.0,21
23997,999,1090,1609.0,1550.0,22
23998,999,1184,301.0,1330.0,23


In [179]:
df_simu = df_simu.merge(df_test,on=['session','test_itemno','test_pos'])
df_simu['correct'] = df_simu.s_resp == df_simu.correct_ans
df_simu

Unnamed: 0,session,test_itemno,s_resp,s_rt,test_pos,test_item,correct_ans,order,rep,test,pair_idx,correct
0,0,210,327.0,1380.0,1,CABBAGE,327,1,1,1,11,True
1,0,1251,819.0,1690.0,2,SCOTCH,819,2,1,1,8,True
2,0,145,1198.0,1250.0,3,BODY,1198,1,1,1,7,True
3,0,1059,145.0,1430.0,4,PICKLE,1391,2,1,1,5,False
4,0,711,1519.0,1350.0,5,HOE,1519,1,1,1,9,True
...,...,...,...,...,...,...,...,...,...,...,...,...
23995,999,965,57.0,1630.0,20,NUT,57,2,1,2,11997,True
23996,999,1075,644.0,1570.0,21,PISTOL,644,2,1,2,11993,True
23997,999,1090,1609.0,1550.0,22,PLUM,1609,2,1,2,11991,True
23998,999,1184,301.0,1330.0,23,REPORT,301,2,1,2,11996,True


In [180]:
df_cond = df_simu.groupby(["pair_idx","test"])['order'].mean().to_frame(name='corr_rate').reset_index()
df_cond = df_cond.pivot_table(index='pair_idx',columns='test',values='corr_rate').reset_index()
df_cond.columns = ['pair_idx','test1','test2']
def cond(x):
    test1 = x['test1']
    test2 = x['test2']
    if test1 == 1 and test2 == 1:
        return 'F-F'
    elif test1 == 1 and test2 == 2:
        return 'F-B'
    elif test1 == 2 and test2 == 1:
        return 'B-F'
    elif test1 == 2 and test2 == 2:
        return 'B-B'

df_cond['cond'] = df_cond.apply(lambda x:cond(x),axis=1)
df_cond['cong'] = df_cond.apply(lambda x: 'Identical' if x['cond'] == 'F-F' or x['cond'] == 'B-B' else 'Reversed',axis=1)
df_cond

Unnamed: 0,pair_idx,test1,test2,cond,cong
0,1,2.0,1.0,B-F,Reversed
1,2,1.0,2.0,F-B,Reversed
2,3,2.0,1.0,B-F,Reversed
3,4,1.0,1.0,F-F,Identical
4,5,2.0,2.0,B-B,Identical
...,...,...,...,...,...
11995,11996,2.0,2.0,B-B,Identical
11996,11997,1.0,2.0,F-B,Reversed
11997,11998,2.0,1.0,B-F,Reversed
11998,11999,1.0,1.0,F-F,Identical


In [181]:
pairidx2cond = df_cond.loc[:,['pair_idx','cond']].set_index("pair_idx").to_dict()['cond']
pairidx2cong = df_cond.loc[:,['pair_idx','cong']].set_index("pair_idx").to_dict()['cong']
df_simu['cond'] = df_simu.apply(lambda x:pairidx2cond[x['pair_idx']],axis=1)
df_simu['cong'] = df_simu.apply(lambda x:pairidx2cong[x['pair_idx']],axis=1)
df_simu.head(24)


Unnamed: 0,session,test_itemno,s_resp,s_rt,test_pos,test_item,correct_ans,order,rep,test,pair_idx,correct,cond,cong
0,0,210,327.0,1380.0,1,CABBAGE,327,1,1,1,11,True,F-F,Identical
1,0,1251,819.0,1690.0,2,SCOTCH,819,2,1,1,8,True,B-B,Identical
2,0,145,1198.0,1250.0,3,BODY,1198,1,1,1,7,True,F-F,Identical
3,0,1059,145.0,1430.0,4,PICKLE,1391,2,1,1,5,False,B-B,Identical
4,0,711,1519.0,1350.0,5,HOE,1519,1,1,1,9,True,F-B,Reversed
5,0,838,140.0,1110.0,6,LOCK,140,2,1,1,1,True,B-F,Reversed
6,0,153,838.0,1720.0,7,BOTTLE,736,1,1,1,6,False,F-B,Reversed
7,0,1587,1240.0,1540.0,8,WARDROBE,1240,1,1,1,2,True,F-B,Reversed
8,0,856,210.0,1860.0,9,MAID,607,2,1,1,10,False,B-B,Identical
9,0,702,1132.0,1530.0,10,HELMET,1132,2,1,1,3,True,B-F,Reversed


In [182]:
len(f_in[0])

24

In [183]:
sessions = df_simu.session.to_numpy()
for sess in sessions:
    df_tmp = df_study.loc[df_study.session == sess]
    tmp1 = df_tmp.study_itemno1.to_numpy()
    tmp2 = df_tmp.study_itemno2.to_numpy()
    tmp = np.concatenate((tmp1, tmp2))
    tmp = np.sort(tmp)
    tmp_test = df_simu.loc[df_simu.session == sess, 'test_itemno']
    tmp_corr = df_simu.loc[df_simu.session == sess, 'correct_ans']
    testid = np.searchsorted(tmp, tmp_test)
    corrid = np.searchsorted(tmp, tmp_corr)
    fin_sess = f_in[sess]
    tmp_corr_fin = []
    tmp_omean_fin = []
    tmp_omax_fin = []
    for i in range(len(testid)):
        tmp_corr_fin.append(fin_sess[i][testid[i]])
        tmp_omean_fin.append(np.mean(np.delete(fin_sess[i], [testid[i],corrid[i]])))
        tmp_omax_fin.append(np.delete(fin_sess[i], [testid[i],corrid[i]]).max())
    df_simu.loc[df_simu.session == sess, 'corr_fin']= tmp_corr_fin
    df_simu.loc[df_simu.session == sess, 'omean_fin'] = tmp_omean_fin
    df_simu.loc[df_simu.session == sess, 'omax_fin'] = tmp_omax_fin
df_simu

Unnamed: 0,session,test_itemno,s_resp,s_rt,test_pos,test_item,correct_ans,order,rep,test,pair_idx,correct,cond,cong,corr_fin,omean_fin,omax_fin
0,0,210,327.0,1380.0,1,CABBAGE,327,1,1,1,11,True,F-F,Identical,0.800726,0.337157,0.530481
1,0,1251,819.0,1690.0,2,SCOTCH,819,2,1,1,8,True,B-B,Identical,0.710841,0.358940,0.634623
2,0,145,1198.0,1250.0,3,BODY,1198,1,1,1,7,True,F-F,Identical,0.703026,0.356648,0.587963
3,0,1059,145.0,1430.0,4,PICKLE,1391,2,1,1,5,False,B-B,Identical,0.679379,0.359289,0.574121
4,0,711,1519.0,1350.0,5,HOE,1519,1,1,1,9,True,F-B,Reversed,0.718311,0.367872,0.572934
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23995,999,965,57.0,1630.0,20,NUT,57,2,1,2,11997,True,F-B,Reversed,0.693931,0.362492,0.559095
23996,999,1075,644.0,1570.0,21,PISTOL,644,2,1,2,11993,True,B-B,Identical,0.674858,0.363544,0.584990
23997,999,1090,1609.0,1550.0,22,PLUM,1609,2,1,2,11991,True,B-B,Identical,0.663802,0.330799,0.550241
23998,999,1184,301.0,1330.0,23,REPORT,301,2,1,2,11996,True,B-B,Identical,0.662295,0.354629,0.537185


In [184]:
df_fin = df_simu.groupby(['cong','test'])['corr_fin','omean_fin','omax_fin'].mean().reset_index()
df_fin['dif'] = df_fin['corr_fin'] - df_fin['omax_fin']
df_fin

  """Entry point for launching an IPython kernel.


Unnamed: 0,cong,test,corr_fin,omean_fin,omax_fin,dif
0,Identical,1,0.691684,0.343244,0.565652,0.126031
1,Identical,2,0.689342,0.34196,0.571075,0.118267
2,Reversed,1,0.691359,0.343297,0.566217,0.125141
3,Reversed,2,0.69082,0.341546,0.570908,0.119911


### PLOT

In [185]:
dfgp_test_order = df_simu.groupby(['rep','test','order']).correct.mean().to_frame(name='corr_rate').reset_index()
dfgp_test_order

Unnamed: 0,rep,test,order,corr_rate
0,1,1,1,0.8515
1,1,1,2,0.851167
2,1,2,1,0.832833
3,1,2,2,0.8395


In [186]:
def contingency_tab(x):
    tmp_x = pd.pivot_table(x,index='pair_idx',columns='test', values= 'correct')
    return pd.crosstab(index=tmp_x[2],columns=tmp_x[1],normalize='all')

In [187]:
dfgp_rep_cong = df_simu.groupby(['cong']).apply(contingency_tab)
dfgp_rep_cong

Unnamed: 0_level_0,1,0.0,1.0
cong,2,Unnamed: 2_level_1,Unnamed: 3_level_1
Identical,0.0,0.027167,0.14
Identical,1.0,0.126833,0.706
Reversed,0.0,0.026,0.1345
Reversed,1.0,0.117333,0.722167


In [188]:
df_simu.groupby(['cong','test']).correct.mean()

cong       test
Identical  1       0.846000
           2       0.832833
Reversed   1       0.856667
           2       0.839500
Name: correct, dtype: float64

In [189]:
# pd.crosstab(index=a[2],columns=a[1])

In [190]:
# fig,ax = plt.subplots(figsize=(5,5))
# ax.spines[['right', 'top']].set_visible(False)
# ax.tick_params(axis="y",direction="in")
# ax.tick_params(axis="x",direction="in")
# sns.lineplot(data=df_laggp, x='lag', y='corr_rate', linewidth=2, marker='o', markersize = 7, hue='order',palette='tab10')
# plt.ylim([0,1])
# plt.xlabel('Test Lag')
# plt.ylabel('Probablility of Recall')
# L = plt.legend(title='Direction',loc='upper right')
# L.get_texts()[0].set_text('Forward')
# L.get_texts()[1].set_text('Backward')
# # plt.savefig('fig/simu6a_recall.pdf')
# plt.show()