In [1]:
import numpy as np
import pickle
import math
import matplotlib.pyplot as plt
import pandas as pd
import time
import pprint
import seaborn as sns
import CMR_IA as cmr
import scipy as sp
sns.set_context('paper')
np.set_printoptions(suppress=True)
# pd.set_option('display.max_columns', None)

In [2]:
def simu_success(tag, params):
    # which task
    path = "../Data/simuS1_design.pkl"
    if tag == "Asso-CR":
        # path = "../Data/simuS1_group1_design.pkl"
        nitems = 48*4  # 96
        test1_num = 40
        i = 1
    elif tag == "Pair-CR":
        # path = "../Data/simuS1_group2_design.pkl"
        nitems = 48*4  # 176
        test1_num = 80
        i = 2
    elif tag == "Item-CR":
        # path = "../Data/simuS1_group3_design.pkl"
        nitems = 48*4  # 136
        test1_num = 80
        i = 3

    # load stimuli
    with open(path, 'rb') as inp:
        df_study = pickle.load(inp)
        df_test = pickle.load(inp)
    df_study = df_study.query(f"group == {i}")
    df_test = df_test.query(f"group == {i}")

    # load semantic matrix
    s_mat = np.load('../Data/wordpools/ltp_FR_similarity_matrix.npy')

    # update n_item in params to fit the study
    params.update(nitems_in_accumulator = nitems)
    # print(params)

    # run CMR
    df_simu, f_in_acc, f_in_dif = cmr.run_success_multi_sess(params, df_study, df_test, s_mat)
    df_simu['test'] = df_test['test']
    df_simu = df_simu.merge(df_test,on=['session','list','test','test_itemno1','test_itemno2'])

    # get f_in
    sessions = np.unique(df_simu.session)
    tmp_corr_fin = []
    tmp_omax_fin = []

    for sess in sessions:
        df_tmp = df_study.loc[df_study.session == sess]
        tmp1 = df_tmp.study_itemno1.to_numpy()
        tmp2 = df_tmp.study_itemno2.to_numpy()
        df_tmp2 = df_test.loc[df_test.session == sess]
        tmp3 = df_tmp2.test_itemno1[df_tmp2.test_itemno1 >= 0].to_numpy()
        tmp4 = df_tmp2.test_itemno2[df_tmp2.test_itemno2 >= 0].to_numpy()
        tmp = np.concatenate((tmp1, tmp2, tmp3, tmp4))
        tmp = np.unique(tmp)  # sort
        nlists = len(np.unique(df_simu.list))

        for lst in range(nlists):
            tmp_corr = df_simu.query(f"session == {sess} and list == {lst}")["correct_ans"][test1_num:]
            corrid = np.searchsorted(tmp, tmp_corr)
            corr_fin = [f_in_dif[sess][lst*int(test1_num/2)+i][id] for i, id in enumerate(corrid)]
            omax_fin = [np.max(np.delete(f_in_dif[sess][lst*int(test1_num/2)+i], id)) for i, id in enumerate(corrid)]

            tmp_corr_fin = tmp_corr_fin + [-1] * test1_num + corr_fin
            tmp_omax_fin = tmp_omax_fin + [-1] * test1_num + omax_fin

    df_simu['corr_fin'] = tmp_corr_fin
    df_simu['omax_fin'] = tmp_omax_fin

    # optimal threshold for test1 (only when manually!!)
    csim_two = df_simu.query("test==1").groupby("correct_ans").csim.mean()
    opt_thresh = np.mean(csim_two)
    df_simu['s_resp'] = df_simu.apply(lambda x: (1 if x['csim'] > opt_thresh else 0) if x['test'] == 1 else x['s_resp'], axis=1)

    return df_simu

In [3]:
# anal_perform used by pso
import sys
sys.path.append("../Modeling/CMR_IA/fitting")
from object_funcs import anal_perform_S1 as anal_perform

In [4]:
# def anal_perform(df_simu):

#     # get correctness
#     df_simu['correct'] = df_simu.s_resp == df_simu.correct_ans

#     # recognition performance
#     df_recog = df_simu.query("test==1")
#     hr_far = df_recog.groupby("correct_ans")["s_resp"].mean().to_frame(name="Yes rate")
#     hr = hr_far['Yes rate'][1]
#     far = hr_far['Yes rate'][0]
#     z_hr_far = sp.stats.norm.ppf(hr_far)
#     d_prime = z_hr_far[1].item() - z_hr_far[0].item()
#     print("recognition: \n", hr_far)
#     print("d_prime: ", d_prime)

#     # cued recall performance
#     df_cr = df_simu.query("test==2")
#     p_rc = df_cr.correct.mean()
#     print("cued recall: \n", p_rc)

#     # analyze pair
#     def get_pair(df_tmp):
#         df_tmp_pair = pd.pivot_table(df_tmp,index="pair_idx",columns="test",values="correct")
#         df_tmp_pair.columns = ["test1","test2"]
#         df_tmp_pair.reset_index(inplace=True)
#         return df_tmp_pair

#     df_simu_p = df_simu.query("pair_idx >= 0")
#     df_pair = df_simu_p.groupby("session").apply(get_pair).reset_index()
#     test2_rsp = pd.Categorical(df_pair.test2, categories=[0,1])
#     test1_rsp = pd.Categorical(df_pair.test1, categories=[0,1])
#     df_tab = pd.crosstab(index=test2_rsp,columns=test1_rsp, rownames=['test2'], colnames=['test1'], normalize=False, dropna=False)
#     print("contingency table: \n", df_tab)
#     # print("contingency table norm: \n", pd.crosstab(index=df_pair.test2,columns=df_pair.test1,normalize='all'))

#     # compute" Q
#     def Yule_Q(A, B, C, D):
#         return (A * D - B * C) / (A * D + B * C)
#     q = Yule_Q(df_tab[1][1]+0.5,df_tab[0][1]+0.5,df_tab[1][0]+0.5,df_tab[0][0]+0.5)  # add 0.5
#     print("Q: ", q)

#     return p_rc, hr, far, q

### Study Params

In [5]:
params = cmr.make_default_params()
# learn while retrieving
# multiple lists, so beta_rec_post
# correct session num - done
# it seems that the size of all potential words matters, the more, the worse CR. RISK IT!

In [6]:
# manually adjusted params
# study params should be the same across three groups
params.update(
    beta_enc = 0.3,
    gamma_fc = 0.2,
    gamma_cf = 0.2,
    s_fc = 0.2,
    s_cf = 0.2,
    phi_s = 1,
    phi_d = 0.6,
    d_ass = 1,
    learn_while_retrieving = False,
    var_enc = 1,
    bad_enc_ratio = 1,
)
params.update(
    beta_cue = 0.4,
    beta_rec = 0.1,  # beta for retrieved item
    beta_rec_post = 1,
    beta_distract = 0.1,
    # beta_rec_post = 0.1,
    c_thresh_itm = 0.01,
    c_thresh_ass = 0.01,
    c_thresh = 0.01,
    kappa = 0.01,
    lamb = 0.002,
    eta = 0.002,
    omega = 3,
    alpha = 1
)
params

{'beta_enc': 0.3,
 'beta_rec': 0.1,
 'beta_cue': 0.4,
 'beta_rec_post': 1,
 'beta_distract': 0.1,
 'phi_s': 1,
 'phi_d': 0.6,
 's_cf': 0.2,
 's_fc': 0.2,
 'kappa': 0.01,
 'eta': 0.002,
 'omega': 3,
 'alpha': 1,
 'c_thresh': 0.01,
 'c_thresh_itm': 0.01,
 'c_thresh_ass': 0.01,
 'd_ass': 1,
 'lamb': 0.002,
 'rec_time_limit': 60000.0,
 'dt': 10,
 'nitems_in_accumulator': 50,
 'max_recalls': 50,
 'learn_while_retrieving': False,
 'a': 2800,
 'b': 20,
 'm': 0,
 'n': 1,
 'c1': 0,
 'No_recall': None,
 'var_enc': 1,
 'bad_enc_ratio': 1,
 'gamma_fc': 0.2,
 'gamma_cf': 0.2}

In [7]:
# # pso results
# import sys
# sys.path.append("../Modeling/CMR_IA/fitting")
# from optimization_utils import make_boundary

# _,_,what_to_fit = make_boundary(sim_name='S1')
# optim_params = np.loadtxt("../Modeling/CMR_IA/fitting/outparams_backup/S1_230803_100-100.txt")
# for pname, pvalue in zip(what_to_fit, optim_params):
#     params[pname] = pvalue
# params.update(beta_rec_post=1)
# params

### Association - CR (Group1)

In [8]:
tag = "Asso-CR"
df_simu_g1 = simu_success(tag, params)
df_simu_g1

CMR2 Time: 78.33125352859497


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,test_item2,correct_ans,order,pair_idx,subject,group,corr_fin,omax_fin
0,0,0,1213,926,1.0,0.014517,0.618491,1,ROUGE,MOUTH,0,1,-1,0,1,-1.000000,-1.000000
1,0,0,830,717,0.0,0.126223,0.510354,1,LIST,HORIZON,1,1,34,0,1,-1.000000,-1.000000
2,0,0,577,1183,0.0,0.496779,0.441849,1,FLEA,RELISH,1,1,0,0,1,-1.000000,-1.000000
3,0,0,588,439,1.0,0.018592,0.606119,1,FOLDER,DANDRUFF,1,0,25,0,1,-1.000000,-1.000000
4,0,0,687,1311,1.0,0.009859,0.637839,1,HANGER,SIRLOIN,1,1,22,0,1,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,99,4,1169,-1,1593.0,1630.000000,0.657843,2,RAT,-1,331,0,226,99,1,-0.477878,-0.226715
29996,99,4,1349,-1,881.0,1640.000000,0.645747,2,SPHINX,-1,848,1,219,99,1,-0.378796,-0.231552
29997,99,4,1307,-1,496.0,1620.000000,0.626900,2,SILK,-1,72,0,194,99,1,-0.359292,-0.252313
29998,99,4,984,-1,636.0,1660.000000,0.620462,2,ORGAN,-1,899,1,196,99,1,-0.346722,-0.268709


In [9]:
# df_simu_g1.query("test==2 and session == 0 and list == 0")

In [10]:
subjects = np.unique(df_simu_g1.subject)
g1_stats = []
for subj in subjects:
    df_subj = df_simu_g1.query(f"subject == {subj}").copy()
    # df_subj = df_simu_g1.query(f"subject == {subj} and list == 0").copy()
    g1_stats.append(list(anal_perform(df_subj)))

contingency table: 
 test1   0   1
test2        
0      15  72
1       0  13
contingency table: 
 test1   0   1
test2        
0      27  59
1       0  14
contingency table: 
 test1   0   1
test2        
0      18  70
1       0  12
contingency table: 
 test1   0   1
test2        
0      28  62
1       0  10
contingency table: 
 test1   0   1
test2        
0      23  60
1       0  17
contingency table: 
 test1   0   1
test2        
0      21  67
1       0  12
contingency table: 
 test1   0   1
test2        
0      22  60
1       0  18
contingency table: 
 test1   0   1
test2        
0      16  70
1       0  14
contingency table: 
 test1   0   1
test2        
0      19  72
1       0   9
contingency table: 
 test1   0   1
test2        
0      26  61
1       0  13
contingency table: 
 test1   0   1
test2        
0      14  66
1       0  20
contingency table: 
 test1   0   1
test2        
0      21  69
1       1   9
contingency table: 
 test1   0   1
test2        
0      28  61
1       1  10

In [11]:
print(np.array(g1_stats))
print(np.mean(np.array(g1_stats),axis=0))
print(sp.stats.sem(np.array(g1_stats),axis=0))

[[0.13       0.85       0.25       0.70468432]
 [0.14       0.73       0.3        0.86114352]
 [0.12       0.82       0.34       0.73545966]
 [0.1        0.72       0.26       0.81089259]
 [0.17       0.77       0.16       0.86296716]
 [0.12       0.79       0.23       0.7768595 ]
 [0.18       0.78       0.3        0.86450168]
 [0.14       0.84       0.29       0.7431694 ]
 [0.09       0.81       0.2        0.67268623]
 [0.13       0.74       0.27       0.84169884]
 [0.2        0.86       0.27       0.79878971]
 [0.1        0.78       0.32       0.32414911]
 [0.11       0.71       0.2        0.52873563]
 [0.15       0.88       0.26       0.68112798]
 [0.15       0.8        0.22       0.81312411]
 [0.12       0.78       0.42       0.41893831]
 [0.18       0.76       0.22       0.87875648]
 [0.09       0.76       0.22       0.7467167 ]
 [0.16       0.87       0.34       0.72340426]
 [0.14       0.85       0.29       0.72552783]
 [0.12       0.77       0.42       0.79938744]
 [0.11       

### Pair - CR (Group2)

In [12]:
tag = "Pair-CR"
df_simu_g2 = simu_success(tag, params)
df_simu_g2

CMR2 Time: 120.71325492858887


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,test_item2,correct_ans,order,pair_idx,subject,group,corr_fin,omax_fin
0,0,0,803,1108,0.0,0.362688,0.457579,1,LAUNDRY,PORK,0,-1,-1,0,2,-1.000000,-1.000000
1,0,0,1310,795,0.0,0.986046,0.407571,1,SIREN,LAGOON,0,-1,-1,0,2,-1.000000,-1.000000
2,0,0,356,1011,1.0,0.024406,0.592514,1,COMMUNITY,PAPER,1,0,18,0,2,-1.000000,-1.000000
3,0,0,238,1575,1.0,0.014086,0.619996,1,CAPITAL,VOLCANO,1,0,36,0,2,-1.000000,-1.000000
4,0,0,121,1572,0.0,14.253997,0.274017,1,BENCH,VIRUS,0,-1,-1,0,2,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,99,3,545,-1,1489.0,1510.000000,0.614297,2,EYE,-1,152,0,171,99,2,-0.439684,-0.203429
47996,99,3,471,-1,1612.0,1660.000000,0.588175,2,DIRECTOR,-1,352,1,144,99,2,-0.459076,-0.262367
47997,99,3,50,-1,1261.0,1650.000000,0.631751,2,ASTEROID,-1,674,0,177,99,2,-0.342888,-0.220733
47998,99,3,850,-1,337.0,1620.000000,0.628664,2,LUNG,-1,1326,1,169,99,2,-0.416143,-0.231776


In [13]:
subjects = np.unique(df_simu_g2.subject)
g2_stats = []
for subj in subjects:
    df_subj = df_simu_g2.query(f"subject == {subj}").copy()
    g2_stats.append(list(anal_perform(df_subj)))

contingency table: 
 test1   0    1
test2         
0      20  124
1       0   16
contingency table: 
 test1   0    1
test2         
0      18  121
1       0   21
contingency table: 
 test1   0    1
test2         
0      10  133
1       0   17
contingency table: 
 test1   0    1
test2         
0      11  135
1       0   14
contingency table: 
 test1   0    1
test2         
0      17  125
1       0   18
contingency table: 
 test1   0    1
test2         
0      16  131
1       0   13
contingency table: 
 test1   0    1
test2         
0      16  125
1       0   19
contingency table: 
 test1   0    1
test2         
0      15  127
1       0   18
contingency table: 
 test1   0    1
test2         
0      11  133
1       0   16
contingency table: 
 test1   0    1
test2         
0      12  131
1       0   17
contingency table: 
 test1   0    1
test2         
0      12  132
1       0   16
contingency table: 
 test1   0    1
test2         
0      11  132
1       0   17
contingency table: 
 test1  

In [14]:
print(np.array(g2_stats))
print(np.mean(np.array(g2_stats),axis=0))
print(sp.stats.sem(np.array(g2_stats),axis=0))

[[0.1        0.875      0.2        0.68913858]
 [0.13125    0.8875     0.2        0.73500545]
 [0.10625    0.9375     0.1625     0.46706587]
 [0.0875     0.93125    0.1875     0.42217484]
 [0.1125     0.89375    0.1625     0.67529107]
 [0.08125    0.9        0.11875    0.54419411]
 [0.11875    0.9        0.1375     0.67360208]
 [0.1125     0.90625    0.10625    0.63623395]
 [0.1        0.93125    0.11875    0.47953216]
 [0.10625    0.925      0.1125     0.53778559]
 [0.1        0.925      0.11875    0.51376147]
 [0.10625    0.93125    0.15625    0.5046729 ]
 [0.08125    0.88125    0.15       0.60763359]
 [0.0625     0.88125    0.10625    0.51386322]
 [0.0625     0.93125    0.1375     0.26771654]
 [0.10625    0.9375     0.175      0.46706587]
 [0.11875    0.9625     0.10625    0.3033419 ]
 [0.075      0.91875    0.1875     0.42706131]
 [0.16875    0.90625    0.10625    0.75592173]
 [0.14375    0.89375    0.15       0.74443266]
 [0.075      0.93125    0.225      0.35294118]
 [0.15       

### Item - CR (Group3)

In [15]:
tag = "Item-CR"
df_simu_g3 = simu_success(tag, params)
df_simu_g3

CMR2 Time: 57.06072688102722


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,test_item2,correct_ans,order,pair_idx,subject,group,corr_fin,omax_fin
0,0,0,319,-1,1.0,0.682557,0.425964,1,CLAW,,1,0,23,0,3,-1.000000,-1.000000
1,0,0,1415,-1,0.0,3.943095,0.338270,1,SUSPECT,,0,-1,-1,0,3,-1.000000,-1.000000
2,0,0,486,-1,0.0,1.696589,0.380438,1,DOOR,,0,-1,-1,0,3,-1.000000,-1.000000
3,0,0,541,-1,0.0,10.771103,0.288025,1,EUROPE,,1,0,28,0,3,-1.000000,-1.000000
4,0,0,48,-1,0.0,114.332039,0.169913,1,ASSISTANT,,0,-1,-1,0,3,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35995,99,2,75,-1,842.0,1860.000000,0.592872,2,BALLOON,,1284,0,123,99,3,-0.429716,-0.340385
35996,99,2,470,-1,224.0,1910.000000,0.640273,2,DIPLOMA,,224,0,119,99,3,-0.332994,-0.362632
35997,99,2,143,-1,301.0,2090.000000,0.546108,2,BOARD,,1526,1,126,99,3,-3.313138,-0.392735
35998,99,2,478,-1,592.0,1950.000000,0.544814,2,DOCTOR,,350,0,102,99,3,-0.557575,-0.371547


In [16]:
subjects = np.unique(df_simu_g3.subject)
g3_stats = []
for subj in subjects:
    df_subj = df_simu_g3.query(f"subject == {subj}").copy()
    g3_stats.append(list(anal_perform(df_subj)))

contingency table: 
 test1   0   1
test2        
0      17  81
1       0  22
contingency table: 
 test1   0   1
test2        
0      28  66
1       0  26
contingency table: 
 test1   0   1
test2        
0      20  74
1       0  26
contingency table: 
 test1   0   1
test2        
0      38  62
1       0  20
contingency table: 
 test1   0   1
test2        
0      25  78
1       1  16
contingency table: 
 test1   0   1
test2        
0      29  66
1       0  25
contingency table: 
 test1   0   1
test2        
0      28  72
1       1  19
contingency table: 
 test1   0   1
test2        
0      24  66
1       2  28
contingency table: 
 test1   0   1
test2        
0      27  64
1       1  28
contingency table: 
 test1   0   1
test2        
0      34  60
1       0  26
contingency table: 
 test1   0   1
test2        
0      28  65
1       0  27
contingency table: 
 test1   0   1
test2        
0      23  82
1       0  15
contingency table: 
 test1   0   1
test2        
0      38  60
1       1  21

In [17]:
print(np.array(g3_stats))
print(np.mean(np.array(g3_stats),axis=0))
print(sp.stats.sem(np.array(g3_stats),axis=0))

[[0.18333333 0.85833333 0.34166667 0.81242808]
 [0.21666667 0.76666667 0.33333333 0.91566265]
 [0.21666667 0.83333333 0.29166667 0.87166236]
 [0.16666667 0.68333333 0.325      0.92382693]
 [0.14166667 0.78333333 0.28333333 0.56267409]
 [0.20833333 0.75833333 0.3        0.91534055]
 [0.16666667 0.75833333 0.34166667 0.67268623]
 [0.25       0.78333333 0.21666667 0.61538462]
 [0.24166667 0.76666667 0.375      0.7802385 ]
 [0.21666667 0.71666667 0.24166667 0.93594494]
 [0.225      0.76666667 0.35833333 0.91977955]
 [0.125      0.80833333 0.33333333 0.79654747]
 [0.18333333 0.675      0.28333333 0.80239521]
 [0.225      0.78333333 0.3        0.4       ]
 [0.10833333 0.7        0.28333333 0.86471145]
 [0.19166667 0.8        0.3        0.65105386]
 [0.175      0.84166667 0.34166667 0.51246261]
 [0.2        0.76666667 0.34166667 0.90648464]
 [0.25833333 0.71666667 0.25       0.95020188]
 [0.225      0.75833333 0.325      0.76977153]
 [0.15       0.8        0.31666667 0.84060914]
 [0.16666667 

In [18]:
stats = []
stats.append(list(np.mean(np.array(g1_stats),axis=0)))
stats.append(list(np.mean(np.array(g2_stats),axis=0)))
stats.append(list(np.mean(np.array(g3_stats),axis=0)))
stats = np.array(stats)
stats

array([[0.125     , 0.792     , 0.2728    , 0.72914212],
       [0.1064375 , 0.911125  , 0.1478125 , 0.56935   ],
       [0.19141667, 0.77158333, 0.30525   , 0.82099368]])

In [19]:
ground_truth = np.array([[0.42, 0.72, 0.22, 0.81], [0.30, 0.80, 0.12, 0.71], [0.19, 0.67, 0.15, 0.57]])  # p_rc, hr, far, q
err = np.mean(np.power(stats - ground_truth,2))
err

0.02244396444094571