In [27]:
import numpy as np
import pickle
import math
import matplotlib.pyplot as plt
import pandas as pd
import time
import pprint
import seaborn as sns
import CMR_IA as cmr
import scipy as sp
sns.set_context('paper')
np.set_printoptions(suppress=True)
# pd.set_option('display.max_columns', None)

In [28]:
def simu_success(tag, params):
    # which task
    path = "../Data/simuS1_design.pkl"
    if tag == "Asso-CR":
        # path = "../Data/simuS1_group1_design.pkl"
        nitems = 48*4  # 96
        test1_num = 40
        i = 1
    elif tag == "Pair-CR":
        # path = "../Data/simuS1_group2_design.pkl"
        nitems = 48*4  # 176
        test1_num = 80
        i = 2
    elif tag == "Item-CR":
        # path = "../Data/simuS1_group3_design.pkl"
        nitems = 48*4  # 136
        test1_num = 80
        i = 3

    # load stimuli
    with open(path, 'rb') as inp:
        df_study = pickle.load(inp)
        df_test = pickle.load(inp)
    df_study = df_study.query(f"group == {i}")
    df_test = df_test.query(f"group == {i}")

    # load semantic matrix
    s_mat = np.load('../Data/wordpools/ltp_FR_similarity_matrix.npy')

    # update n_item in params to fit the study
    params.update(nitems_in_accumulator = nitems)
    # print(params)

    # run CMR
    df_simu, f_in_acc, f_in_dif = cmr.run_success_multi_sess(params, df_study, df_test, s_mat)
    df_simu['test'] = df_test['test']
    df_simu = df_simu.merge(df_test,on=['session','list','test','test_itemno1','test_itemno2'])

    # get f_in
    sessions = np.unique(df_simu.session)
    tmp_corr_fin = []
    tmp_omax_fin = []

    for sess in sessions:
        df_tmp = df_study.loc[df_study.session == sess]
        tmp1 = df_tmp.study_itemno1.to_numpy()
        tmp2 = df_tmp.study_itemno2.to_numpy()
        df_tmp2 = df_test.loc[df_test.session == sess]
        tmp3 = df_tmp2.test_itemno1[df_tmp2.test_itemno1 >= 0].to_numpy()
        tmp4 = df_tmp2.test_itemno2[df_tmp2.test_itemno2 >= 0].to_numpy()
        tmp = np.concatenate((tmp1, tmp2, tmp3, tmp4))
        tmp = np.unique(tmp)  # sort
        nlists = len(np.unique(df_simu.list))

        for lst in range(nlists):
            tmp_corr = df_simu.query(f"session == {sess} and list == {lst}")["correct_ans"][test1_num:]
            corrid = np.searchsorted(tmp, tmp_corr)
            corr_fin = [f_in_dif[sess][lst*int(test1_num/2)+i][id] for i, id in enumerate(corrid)]
            omax_fin = [np.max(np.delete(f_in_dif[sess][lst*int(test1_num/2)+i], id)) for i, id in enumerate(corrid)]

            tmp_corr_fin = tmp_corr_fin + [-1] * test1_num + corr_fin
            tmp_omax_fin = tmp_omax_fin + [-1] * test1_num + omax_fin

    df_simu['corr_fin'] = tmp_corr_fin
    df_simu['omax_fin'] = tmp_omax_fin

    # optimal threshold for test1 (only when manually!!)
    # csim_two = df_simu.query("test==1").groupby("correct_ans").csim.mean()
    # opt_thresh = np.mean(csim_two)
    # df_simu['s_resp'] = df_simu.apply(lambda x: (1 if x['csim'] > opt_thresh else 0) if x['test'] == 1 else x['s_resp'], axis=1)

    return df_simu

In [29]:
# anal_perform used by pso
import sys
sys.path.append("../Modeling/CMR_IA/fitting")
from object_funcs import anal_perform_S1 as anal_perform

In [30]:
# def anal_perform(df_simu):

#     # get correctness
#     df_simu['correct'] = df_simu.s_resp == df_simu.correct_ans

#     # recognition performance
#     df_recog = df_simu.query("test==1")
#     hr_far = df_recog.groupby("correct_ans")["s_resp"].mean().to_frame(name="Yes rate")
#     hr = hr_far['Yes rate'][1]
#     far = hr_far['Yes rate'][0]
#     z_hr_far = sp.stats.norm.ppf(hr_far)
#     d_prime = z_hr_far[1].item() - z_hr_far[0].item()
#     print("recognition: \n", hr_far)
#     print("d_prime: ", d_prime)

#     # cued recall performance
#     df_cr = df_simu.query("test==2")
#     p_rc = df_cr.correct.mean()
#     print("cued recall: \n", p_rc)

#     # analyze pair
#     def get_pair(df_tmp):
#         df_tmp_pair = pd.pivot_table(df_tmp,index="pair_idx",columns="test",values="correct")
#         df_tmp_pair.columns = ["test1","test2"]
#         df_tmp_pair.reset_index(inplace=True)
#         return df_tmp_pair

#     df_simu_p = df_simu.query("pair_idx >= 0")
#     df_pair = df_simu_p.groupby("session").apply(get_pair).reset_index()
#     test2_rsp = pd.Categorical(df_pair.test2, categories=[0,1])
#     test1_rsp = pd.Categorical(df_pair.test1, categories=[0,1])
#     df_tab = pd.crosstab(index=test2_rsp,columns=test1_rsp, rownames=['test2'], colnames=['test1'], normalize=False, dropna=False)
#     print("contingency table: \n", df_tab)
#     # print("contingency table norm: \n", pd.crosstab(index=df_pair.test2,columns=df_pair.test1,normalize='all'))

#     # compute" Q
#     def Yule_Q(A, B, C, D):
#         return (A * D - B * C) / (A * D + B * C)
#     q = Yule_Q(df_tab[1][1]+0.5,df_tab[0][1]+0.5,df_tab[1][0]+0.5,df_tab[0][0]+0.5)  # add 0.5
#     print("Q: ", q)

#     return p_rc, hr, far, q

### Study Params

In [31]:
params = cmr.make_default_params()
# learn while retrieving
# multiple lists, so beta_rec_post
# correct session num - done
# it seems that the size of all potential words matters, the more, the worse CR. RISK IT!

In [32]:
# # manually adjusted params
# # study params should be the same across three groups
# params.update(
#     beta_enc = 0.3,
#     gamma_fc = 0.2,
#     gamma_cf = 0.2,
#     s_fc = 0.2,
#     s_cf = 0.2,
#     phi_s = 1,
#     phi_d = 0.6,
#     d_ass = 1,
#     learn_while_retrieving = False,
#     var_enc = 1,
#     bad_enc_ratio = 1,
# )
# params.update(
#     beta_cue = 0.4,
#     beta_rec = 0.1,  # beta for retrieved item
#     beta_rec_post = 1,
#     beta_distract = 0.1,
#     # beta_rec_post = 0.1,
#     c_thresh_itm = 0.01,
#     c_thresh_ass = 0.01,
#     c_thresh = 0.01,
#     kappa = 0.01,
#     lamb = 0.002,
#     eta = 0.002,
#     omega = 3,
#     alpha = 1
# )
# params

In [33]:
# pso results
import sys
sys.path.append("../Modeling/CMR_IA/fitting")
from optimization_utils import make_boundary

_,_,what_to_fit = make_boundary(sim_name='S1')
optim_params = np.loadtxt("../Modeling/CMR_IA/fitting/outparams_backup/S1_230805_100-100.txt")
for pname, pvalue in zip(what_to_fit, optim_params):
    params[pname] = pvalue
# params.update(beta_rec_post=1)
params.update(learn_while_retrieving=True, rec_time_limit=10000)
params

{'beta_enc': 0.15921,
 'beta_rec': 0.939965,
 'beta_cue': 0.705133,
 'beta_rec_post': 0.738495,
 'beta_distract': 0.710264,
 'phi_s': 5.396206,
 'phi_d': 0.017273,
 's_cf': 0.456621,
 's_fc': 0.276619,
 'kappa': 0.414287,
 'eta': 0.044371,
 'omega': 8.299276,
 'alpha': 0.707874,
 'c_thresh': 0.326692,
 'c_thresh_itm': 0.454423,
 'c_thresh_ass': 0.593675,
 'd_ass': 0.638351,
 'lamb': 0.131342,
 'rec_time_limit': 10000,
 'dt': 10,
 'nitems_in_accumulator': 50,
 'max_recalls': 50,
 'learn_while_retrieving': True,
 'a': 2800,
 'b': 20,
 'm': 0,
 'n': 1,
 'c1': 0,
 'No_recall': None,
 'var_enc': 1,
 'bad_enc_ratio': 1,
 'gamma_fc': 0.219794,
 'gamma_cf': 0.803381}

### Association - CR (Group1)

In [34]:
tag = "Asso-CR"
df_simu_g1 = simu_success(tag, params)
df_simu_g1

CMR2 Time: 104.5850191116333


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,test_item2,correct_ans,order,pair_idx,subject,group,corr_fin,omax_fin
0,0,0,1213,926,1.0,152.140854,0.600051,1,ROUGE,MOUTH,0,1,-1,0,1,-1.000000,-1.000000
1,0,0,830,717,0.0,275.939301,0.570283,1,LIST,HORIZON,1,1,34,0,1,-1.000000,-1.000000
2,0,0,577,1183,0.0,1737.835449,0.478272,1,FLEA,RELISH,1,1,0,0,1,-1.000000,-1.000000
3,0,0,588,439,1.0,57.900455,0.648355,1,FOLDER,DANDRUFF,1,0,25,0,1,-1.000000,-1.000000
4,0,0,687,1311,1.0,35.250328,0.673168,1,HANGER,SIRLOIN,1,1,22,0,1,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,99,4,1169,-1,1263.0,310.000000,0.726477,2,RAT,-1,331,0,226,99,1,2.641442,9.897938
29996,99,4,1349,-1,1436.0,510.000000,0.583148,2,SPHINX,-1,848,1,219,99,1,4.444906,8.169336
29997,99,4,1307,-1,1149.0,270.000000,0.747532,2,SILK,-1,72,0,194,99,1,4.631679,7.503875
29998,99,4,984,-1,1583.0,340.000000,0.600293,2,ORGAN,-1,899,1,196,99,1,5.183475,7.919909


In [35]:
# df_simu_g1.query("test==2 and session == 0 and list == 0")

In [36]:
subjects = np.unique(df_simu_g1.subject)
g1_stats = []
for subj in subjects:
    df_subj = df_simu_g1.query(f"subject == {subj}").copy()
    # df_subj = df_simu_g1.query(f"subject == {subj} and list == 0").copy()
    g1_stats.append(list(anal_perform(df_subj)))

In [37]:
print(np.array(g1_stats))
print(np.mean(np.array(g1_stats),axis=0))
print(sp.stats.sem(np.array(g1_stats),axis=0))

[[0.28       0.81       0.23       0.90815451]
 [0.39       0.68       0.22       0.80890382]
 [0.32       0.76       0.26       0.94563225]
 [0.31       0.69       0.23       0.79155944]
 [0.45       0.69       0.13       0.7373533 ]
 [0.37       0.71       0.23       0.66277372]
 [0.33       0.74       0.18       0.17376491]
 [0.4        0.75       0.2        0.81221833]
 [0.4        0.75       0.15       0.81221833]
 [0.33       0.69       0.29       0.22608696]
 [0.41       0.82       0.18       0.71400394]
 [0.38       0.77       0.24       0.76646033]
 [0.41       0.67       0.19       0.83835616]
 [0.51       0.79       0.21       0.78114144]
 [0.38       0.8        0.2        0.83615819]
 [0.4        0.75       0.29       0.81221833]
 [0.31       0.69       0.18       0.79155944]
 [0.47       0.74       0.18       0.978389  ]
 [0.52       0.82       0.27       0.900369  ]
 [0.44       0.74       0.18       0.91828794]
 [0.35       0.75       0.33       0.54358974]
 [0.36       

### Pair - CR (Group2)

In [38]:
tag = "Pair-CR"
df_simu_g2 = simu_success(tag, params)
df_simu_g2

CMR2 Time: 102.60784888267517


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,test_item2,correct_ans,order,pair_idx,subject,group,corr_fin,omax_fin
0,0,0,192,1402,0.0,268.583893,0.571634,1,BULB,SUIT,1,1,13,0,2,-1.000000,-1.000000
1,0,0,810,1481,0.0,442.109833,0.546714,1,LEMON,TOOTH,0,-1,-1,0,2,-1.000000,-1.000000
2,0,0,384,686,1.0,10.523116,0.733613,1,COTTAGE,HANDKERCHIEF,1,1,8,0,2,-1.000000,-1.000000
3,0,0,1317,1137,0.0,19.354403,0.205700,1,SKILLET,PUBLISHER,0,-1,-1,0,2,-1.000000,-1.000000
4,0,0,506,977,0.0,1140.165649,0.499345,1,DUNE,ONION,0,-1,-1,0,2,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35995,99,2,120,-1,1226.0,240.000000,0.645008,2,BELT,-1,917,0,97,99,2,5.202969,9.418498
35996,99,2,328,-1,251.0,240.000000,0.702934,2,CLOUD,-1,869,0,113,99,2,3.528714,9.467577
35997,99,2,456,-1,762.0,230.000000,0.696957,2,DIAGRAM,-1,1582,0,110,99,2,5.303322,10.842830
35998,99,2,655,-1,598.0,210.000000,0.773843,2,GRASS,-1,1520,0,114,99,2,5.166933,12.452250


In [39]:
subjects = np.unique(df_simu_g2.subject)
g2_stats = []
for subj in subjects:
    df_subj = df_simu_g2.query(f"subject == {subj}").copy()
    g2_stats.append(list(anal_perform(df_subj)))

In [40]:
print(np.array(g2_stats))
print(np.mean(np.array(g2_stats),axis=0))
print(sp.stats.sem(np.array(g2_stats),axis=0))

[[0.29166667 0.75       0.025      0.73827947]
 [0.35       0.75       0.00833333 0.88563727]
 [0.26666667 0.66666667 0.05833333 0.80584391]
 [0.29166667 0.68333333 0.         0.8935236 ]
 [0.31666667 0.76666667 0.00833333 0.85060565]
 [0.41666667 0.775      0.01666667 0.83154387]
 [0.31666667 0.78333333 0.06666667 0.71888515]
 [0.26666667 0.70833333 0.025      0.95468022]
 [0.35       0.76666667 0.01666667 0.78206725]
 [0.34166667 0.73333333 0.01666667 0.55319149]
 [0.225      0.7        0.04166667 0.82936315]
 [0.275      0.79166667 0.05       0.7863152 ]
 [0.28333333 0.74166667 0.025      0.95020188]
 [0.23333333 0.68333333 0.00833333 0.95153402]
 [0.24166667 0.75       0.         0.52697748]
 [0.225      0.675      0.01666667 0.74259681]
 [0.24166667 0.70833333 0.00833333 0.94746629]
 [0.175      0.75       0.05       0.8993483 ]
 [0.25       0.675      0.05833333 0.870317  ]
 [0.35       0.75       0.025      0.96327149]
 [0.275      0.69166667 0.01666667 0.96059306]
 [0.275      

### Item - CR (Group3)

In [41]:
tag = "Item-CR"
df_simu_g3 = simu_success(tag, params)
df_simu_g3

CMR2 Time: 128.40015125274658


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,test_item2,correct_ans,order,pair_idx,subject,group,corr_fin,omax_fin
0,0,0,609,-1,1.0,2217.196289,0.466092,1,FROST,,1,0,14,0,3,-1.000000,-1.000000
1,0,0,101,-1,0.0,402.078583,0.357387,1,BATTERY,,0,-1,-1,0,3,-1.000000,-1.000000
2,0,0,927,-1,1.0,2485.594971,0.460378,1,MOVIE,,1,1,13,0,3,-1.000000,-1.000000
3,0,0,1303,-1,1.0,324.708893,0.562145,1,SIDEWALK,,1,0,31,0,3,-1.000000,-1.000000
4,0,0,945,-1,1.0,478.252441,0.542785,1,NEST,,1,0,2,0,3,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,99,3,509,-1,1442.0,220.000000,0.728876,2,DUSTPAN,,128,1,149,99,3,3.932299,14.749866
47996,99,3,2,-1,771.0,210.000000,0.689670,2,ACID,,970,1,163,99,3,4.086551,14.537923
47997,99,3,1463,-1,730.0,190.000000,0.601258,2,TICK,,1255,0,171,99,3,3.736512,14.427834
47998,99,3,1615,-1,563.0,210.000000,0.546254,2,WINNER,,667,1,157,99,3,4.174815,12.438631


In [42]:
subjects = np.unique(df_simu_g3.subject)
g3_stats = []
for subj in subjects:
    df_subj = df_simu_g3.query(f"subject == {subj}").copy()
    g3_stats.append(list(anal_perform(df_subj)))

In [43]:
print(np.array(g3_stats))
print(np.mean(np.array(g3_stats),axis=0))
print(sp.stats.sem(np.array(g3_stats),axis=0))

[[ 0.15625     0.7375      0.45        0.91729323]
 [ 0.125       0.76875     0.39375     0.63903282]
 [ 0.11875     0.79375     0.29375    -0.05085625]
 [ 0.11875     0.8         0.4375      0.55711423]
 [ 0.1125      0.76875     0.45625     0.85867381]
 [ 0.15625     0.73125     0.4         0.06635071]
 [ 0.1375      0.675       0.38125     0.78805006]
 [ 0.11875     0.7125      0.43125     0.70653827]
 [ 0.09375     0.675       0.325       0.2937794 ]
 [ 0.1875      0.75        0.375       0.51821862]
 [ 0.13125     0.825       0.4625      0.53924915]
 [ 0.10625     0.725       0.35625     0.65284974]
 [ 0.0875      0.74375     0.35625     0.5477707 ]
 [ 0.16875     0.725       0.3875      0.26198083]
 [ 0.15625     0.75625     0.2875      0.73152822]
 [ 0.09375     0.79375     0.4125      0.80451781]
 [ 0.10625     0.76875     0.4625      0.33962264]
 [ 0.10625     0.75625     0.5         0.60113422]
 [ 0.15        0.78125     0.35        0.31282292]
 [ 0.1125      0.75        0.41

In [44]:
stats = []
stats.append(list(np.mean(np.array(g1_stats),axis=0)))
stats.append(list(np.mean(np.array(g2_stats),axis=0)))
stats.append(list(np.mean(np.array(g3_stats),axis=0)))
stats = np.array(stats)
stats

array([[0.3733    , 0.7395    , 0.2099    , 0.78391467],
       [0.28325   , 0.735     , 0.02525   , 0.80690073],
       [0.127125  , 0.7518125 , 0.403875  , 0.50537087]])

In [45]:
ground_truth = np.array([[0.42, 0.72, 0.22, 0.81], [0.30, 0.80, 0.12, 0.71], [0.19, 0.67, 0.15, 0.57]])  # p_rc, hr, far, q
err = np.mean(np.power(stats - ground_truth,2))
err

0.008791038362573568