In [1]:
import numpy as np
import pickle
import math
import matplotlib.pyplot as plt
import pandas as pd
import time
import pprint
import seaborn as sns
import CMR_IA as cmr
import scipy as sp
sns.set_context('paper')
np.set_printoptions(suppress=True)
# pd.set_option('display.max_columns', None)

In [2]:
def simu_success(tag, params):
    # which task
    path = "../Data/simuS1_design.pkl"
    if tag == "Asso-CR":
        # path = "../Data/simuS1_group1_design.pkl"
        nitems = 96
        test1_num = 40
        i = 1
    elif tag == "Pair-CR":
        # path = "../Data/simuS1_group2_design.pkl"
        nitems = 176
        test1_num = 80
        i = 2
    elif tag == "Item-CR":
        # path = "../Data/simuS1_group3_design.pkl"
        nitems = 136
        test1_num = 80
        i = 3

    # load stimuli
    with open(path, 'rb') as inp:
        df_study = pickle.load(inp)
        df_test = pickle.load(inp)
    df_study = df_study.query(f"group == {i}")
    df_test = df_test.query(f"group == {i}")

    # load semantic matrix
    s_mat = np.load('../Data/wordpools/ltp_FR_similarity_matrix.npy')

    # update n_item in params to fit the study
    params.update(nitems_in_accumulator = nitems)
    # print(params)

    # run CMR
    df_simu, f_in_acc, f_in_dif = cmr.run_success_multi_sess(params, df_study, df_test, s_mat)
    df_simu['test'] = df_test['test']
    df_simu = df_simu.merge(df_test,on=['session','test','test_itemno1','test_itemno2'])

    # get f_in
    sessions = np.unique(df_simu.session)
    tmp_corr_fin = []
    tmp_omax_fin = []

    for sess in sessions:
        df_tmp = df_study.loc[df_study.session == sess]
        tmp1 = df_tmp.study_itemno1.to_numpy()
        tmp2 = df_tmp.study_itemno2.to_numpy()
        df_tmp2 = df_test.loc[df_test.session == sess]
        tmp3 = df_tmp2.test_itemno1[df_tmp2.test_itemno1 >= 0].to_numpy()
        tmp4 = df_tmp2.test_itemno2[df_tmp2.test_itemno2 >= 0].to_numpy()
        tmp = np.concatenate((tmp1, tmp2, tmp3, tmp4))
        tmp = np.unique(tmp)  # sort

        tmp_corr = df_simu.loc[df_simu.session == sess,"correct_ans"][test1_num:]
        corrid = np.searchsorted(tmp, tmp_corr)

        corr_fin = [f_in_dif[sess][i][id] for i, id in enumerate(corrid)]
        omax_fin = [np.max(np.delete(f_in_dif[sess][i], id)) for i, id in enumerate(corrid)]

        tmp_corr_fin = tmp_corr_fin + [-1] * test1_num + corr_fin
        tmp_omax_fin = tmp_omax_fin + [-1] * test1_num + omax_fin

    df_simu['corr_fin'] = tmp_corr_fin
    df_simu['omax_fin'] = tmp_omax_fin

    # optimal threshold for test1, only when manually
    csim_two = df_simu.query("test==1").groupby("correct_ans").csim.mean()
    opt_thresh = np.mean(csim_two)
    df_simu['s_resp'] = df_simu.apply(lambda x: (1 if x['csim'] > opt_thresh else 0) if x['test'] == 1 else x['s_resp'], axis=1)

    return df_simu

In [3]:
# anal_perform used by pso
import sys
sys.path.append("../Modeling/CMR_IA/fitting")
from object_funcs import anal_perform_S1 as anal_perform

In [4]:
# def anal_perform(df_simu):

#     # get correctness
#     df_simu['correct'] = df_simu.s_resp == df_simu.correct_ans

#     # recognition performance
#     df_recog = df_simu.query("test==1")
#     hr_far = df_recog.groupby("correct_ans")["s_resp"].mean().to_frame(name="Yes rate")
#     hr = hr_far['Yes rate'][1]
#     far = hr_far['Yes rate'][0]
#     z_hr_far = sp.stats.norm.ppf(hr_far)
#     d_prime = z_hr_far[1].item() - z_hr_far[0].item()
#     print("recognition: \n", hr_far)
#     print("d_prime: ", d_prime)

#     # cued recall performance
#     df_cr = df_simu.query("test==2")
#     p_rc = df_cr.correct.mean()
#     print("cued recall: \n", p_rc)

#     # analyze pair
#     def get_pair(df_tmp):
#         df_tmp_pair = pd.pivot_table(df_tmp,index="pair_idx",columns="test",values="correct")
#         df_tmp_pair.columns = ["test1","test2"]
#         df_tmp_pair.reset_index(inplace=True)
#         return df_tmp_pair

#     df_simu_p = df_simu.query("pair_idx >= 0")
#     df_pair = df_simu_p.groupby("session").apply(get_pair).reset_index()
#     test2_rsp = pd.Categorical(df_pair.test2, categories=[0,1])
#     test1_rsp = pd.Categorical(df_pair.test1, categories=[0,1])
#     df_tab = pd.crosstab(index=test2_rsp,columns=test1_rsp, rownames=['test2'], colnames=['test1'], normalize=False, dropna=False)
#     print("contingency table: \n", df_tab)
#     # print("contingency table norm: \n", pd.crosstab(index=df_pair.test2,columns=df_pair.test1,normalize='all'))

#     # compute" Q
#     def Yule_Q(A, B, C, D):
#         return (A * D - B * C) / (A * D + B * C)
#     q = Yule_Q(df_tab[1][1]+0.5,df_tab[0][1]+0.5,df_tab[1][0]+0.5,df_tab[0][0]+0.5)  # add 0.5
#     print("Q: ", q)

#     return p_rc, hr, far, q

### Study Params

In [5]:
params = cmr.make_default_params()

In [6]:
# manually adjusted params
# study params should be the same across three groups
params.update(
    beta_enc = 0.3,
    gamma_fc = 0.2,
    gamma_cf = 0.2,
    s_fc = 0.2,
    s_cf = 0.2,
    phi_s = 1,
    phi_d = 0.6,
    d_ass = 1,
    learn_while_retrieving = False,
    var_enc = 1,
    bad_enc_ratio = 1,
)
params.update(
    beta_cue = 0.4,
    beta_rec = 0.1,  # beta for retrieved item
    beta_distract = 0.1,
    # beta_rec_post = 0.1,
    c_thresh_itm = 0.01,
    c_thresh_ass = 0.01,
    c_thresh = 0.01,
    kappa = 0.01,
    lamb = 0.002,
    eta = 0.002,
    omega = 3,
    alpha = 1
)
params

{'beta_enc': 0.3,
 'beta_rec': 0.1,
 'beta_cue': 0.4,
 'beta_rec_post': 0.5,
 'beta_distract': 0.1,
 'phi_s': 1,
 'phi_d': 0.6,
 's_cf': 0.2,
 's_fc': 0.2,
 'kappa': 0.01,
 'eta': 0.002,
 'omega': 3,
 'alpha': 1,
 'c_thresh': 0.01,
 'c_thresh_itm': 0.01,
 'c_thresh_ass': 0.01,
 'd_ass': 1,
 'lamb': 0.002,
 'rec_time_limit': 60000.0,
 'dt': 10,
 'nitems_in_accumulator': 50,
 'max_recalls': 50,
 'learn_while_retrieving': False,
 'a': 2800,
 'b': 20,
 'm': 0,
 'n': 1,
 'c1': 0,
 'No_recall': None,
 'var_enc': 1,
 'bad_enc_ratio': 1,
 'gamma_fc': 0.2,
 'gamma_cf': 0.2}

In [7]:
# # pso results
# import sys
# sys.path.append("../Modeling/CMR_IA/fitting")
# from optimization_utils import make_boundary

# _,_,what_to_fit = make_boundary(sim_name='S1')
# optim_params = np.loadtxt("../Modeling/CMR_IA/fitting/outparams_backup/S1_230802_100-100.txt")
# for pname, pvalue in zip(what_to_fit, optim_params):
#     params[pname] = pvalue
# params

### Association - CR (Group1)

In [8]:
tag = "Asso-CR"
df_simu_g1 = simu_success(tag, params)
df_simu_g1

CMR2 Time: 19.92398738861084


Unnamed: 0,session,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,test_item2,correct_ans,order,pair_idx,subject,group,corr_fin,omax_fin
0,0,146,1542,1.0,0.010489,0.634740,1,BOLT,UTENSIL,1,0,34,1,1,-1.000000,-1.000000
1,0,147,1118,0.0,1.457309,0.388039,1,BOMB,PRAIRIE,1,1,0,1,1,-1.000000,-1.000000
2,0,1185,159,0.0,1.545035,0.385116,1,REPTILE,BOX,0,1,-1,1,1,-1.000000,-1.000000
3,0,358,1421,1.0,0.105787,0.519185,1,COMPASS,SWING,1,1,35,1,1,-1.000000,-1.000000
4,0,1201,1359,1.0,0.119928,0.512912,1,ROCK,SPRINKLE,1,0,12,1,1,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17995,299,886,-1,1336.0,2280.000000,0.534485,2,MEDICINE,-1,1336,1,22,100,1,-0.506431,-0.589019
17996,299,136,-1,1572.0,2880.000000,0.496333,2,BLENDER,-1,1572,1,19,100,1,-0.556753,-0.587156
17997,299,157,-1,1164.0,2500.000000,0.524669,2,BOUQUET,-1,1164,0,35,100,1,-0.519074,-0.601125
17998,299,1303,-1,622.0,3100.000000,0.400185,2,SIDEWALK,-1,346,1,1,100,1,-0.635673,-0.612145


In [9]:
subjects = np.unique(df_simu_g1.subject)
g1_stats = []
for subj in subjects:
    df_subj = df_simu_g1.query(f"subject == {subj}").copy()
    g1_stats.append(list(anal_perform(df_subj)))

In [10]:
print(np.array(g1_stats))
print(np.mean(np.array(g1_stats),axis=0))
print(sp.stats.sem(np.array(g1_stats),axis=0))

[[ 0.35        0.91666667  0.16666667  0.74538745]
 [ 0.46666667  0.85        0.18333333  0.72828096]
 [ 0.56666667  0.91666667  0.15        0.89276808]
 [ 0.43333333  0.86666667  0.11666667  0.88888889]
 [ 0.45        0.91666667  0.2         0.4587156 ]
 [ 0.4         0.95        0.16666667  0.67317073]
 [ 0.35        0.88333333  0.11666667  0.81690141]
 [ 0.45        0.81666667  0.28333333  0.77511962]
 [ 0.33333333  0.85        0.13333333  0.85035629]
 [ 0.43333333  0.85        0.1         0.45544554]
 [ 0.3         0.9         0.16666667  0.73646209]
 [ 0.45        0.91666667  0.18333333  0.07854985]
 [ 0.5         0.85        0.11666667  0.76274165]
 [ 0.43333333  0.9         0.2         0.84718499]
 [ 0.36666667  0.8         0.28333333  0.91001698]
 [ 0.36666667  0.86666667  0.28333333  0.54676259]
 [ 0.3         0.85        0.11666667  0.48379052]
 [ 0.43333333  0.86666667  0.11666667  0.64516129]
 [ 0.43333333  0.93333333  0.1         0.7732342 ]
 [ 0.41666667  0.9         0.15

### Pair - CR (Group2)

In [11]:
tag = "Pair-CR"
df_simu_g2 = simu_success(tag, params)
df_simu_g2

CMR2 Time: 81.59914779663086


Unnamed: 0,session,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,test_item2,correct_ans,order,pair_idx,subject,group,corr_fin,omax_fin
0,0,1109,898,1.0,0.028645,0.584507,1,PORPOISE,MINT,1,1,32,1,2,-1.000000,-1.000000
1,0,661,567,0.0,23.437260,0.249152,1,GRILL,FINGER,0,-1,-1,1,2,-1.000000,-1.000000
2,0,1086,732,1.0,0.082826,0.531419,1,PLATE,ICING,1,1,39,1,2,-1.000000,-1.000000
3,0,249,720,1.0,0.052403,0.554308,1,CARROT,HOSE,1,1,13,1,2,-1.000000,-1.000000
4,0,1385,340,1.0,0.093647,0.525280,1,STONE,COD,1,0,28,1,2,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,499,562,-1,334.0,2950.000000,0.492958,2,FIELD,-1,334,1,0,100,2,-0.475192,-0.580118
59996,499,1047,-1,112.0,2610.000000,0.440358,2,PEPPER,-1,296,0,27,100,2,-3.402697,-0.552321
59997,499,1527,-1,821.0,2920.000000,0.490133,2,TWIG,-1,112,0,30,100,2,-3.375248,-0.518277
59998,499,574,-1,1461.0,2650.000000,0.521709,2,FLANNEL,-1,1461,0,24,100,2,-0.521370,-0.557040


In [12]:
subjects = np.unique(df_simu_g2.subject)
g2_stats = []
for subj in subjects:
    df_subj = df_simu_g2.query(f"subject == {subj}").copy()
    g2_stats.append(list(anal_perform(df_subj)))

In [13]:
print(np.array(g2_stats))
print(np.mean(np.array(g2_stats),axis=0))
print(sp.stats.sem(np.array(g2_stats),axis=0))

[[ 0.385       0.975       0.02        0.31518625]
 [ 0.445       0.965       0.05        0.85556323]
 [ 0.39        0.98        0.02        0.71272727]
 [ 0.44        0.975       0.03        0.80111008]
 [ 0.345       0.995       0.015       0.2300885 ]
 [ 0.365       0.97        0.015       0.77437326]
 [ 0.41        0.985       0.025       0.66666667]
 [ 0.41        0.965       0.015       0.83469236]
 [ 0.395       0.955       0.02        0.59343284]
 [ 0.405       0.98        0.025       0.23439211]
 [ 0.365       0.97        0.02        0.77437326]
 [ 0.405       0.96        0.02        0.8510354 ]
 [ 0.375       0.98        0.035       0.69662921]
 [ 0.43        0.98        0.01        0.75140607]
 [ 0.45        0.995       0.03        0.42519685]
 [ 0.35        0.97        0.01        0.76080692]
 [ 0.375       0.98        0.02        0.17322835]
 [ 0.38        0.975       0.015       0.75130073]
 [ 0.39        0.97        0.01        0.79507476]
 [ 0.34        0.99        0.01

### Item - CR (Group3)

In [14]:
tag = "Item-CR"
df_simu_g3 = simu_success(tag, params)
df_simu_g3

CMR2 Time: 49.75562119483948


Unnamed: 0,session,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,test_item2,correct_ans,order,pair_idx,subject,group,corr_fin,omax_fin
0,0,1292,-1,1.0,0.350559,0.459280,1,SHORTCAKE,,1,0,14,1,3,-1.000000,-1.000000
1,0,1052,-1,1.0,0.283493,0.469897,1,PERSON,,1,0,39,1,3,-1.000000,-1.000000
2,0,135,-1,1.0,0.871508,0.413745,1,BLANKET,,1,0,19,1,3,-1.000000,-1.000000
3,0,1365,-1,0.0,354.443207,0.113341,1,STAFF,,0,-1,-1,1,3,-1.000000,-1.000000
4,0,760,-1,0.0,148.892548,0.156707,1,JOURNAL,,0,-1,-1,1,3,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,399,454,-1,1497.0,3340.000000,0.419670,2,DETECTIVE,,37,0,10,100,3,-0.599770,-0.587448
47996,399,449,-1,1242.0,2430.000000,0.535716,2,DEPARTMENT,,1242,0,34,100,3,-0.489377,-0.582558
47997,399,859,-1,942.0,2380.000000,0.529333,2,MAJOR,,942,1,20,100,3,-0.500488,-0.612095
47998,399,1584,-1,1178.0,2840.000000,0.495540,2,WALNUT,,1178,1,21,100,3,-0.550774,-0.593766


In [15]:
subjects = np.unique(df_simu_g3.subject)
g3_stats = []
for subj in subjects:
    df_subj = df_simu_g3.query(f"subject == {subj}").copy()
    g3_stats.append(list(anal_perform(df_subj)))

In [16]:
print(np.array(g3_stats))
print(np.mean(np.array(g3_stats),axis=0))
print(sp.stats.sem(np.array(g3_stats),axis=0))

[[0.4125     0.83125    0.19375    0.64060318]
 [0.4375     0.83125    0.2625     0.59281275]
 [0.58125    0.85       0.125      0.44843793]
 [0.46875    0.80625    0.19375    0.56882449]
 [0.475      0.85       0.2        0.50215952]
 [0.48125    0.79375    0.24375    0.74256799]
 [0.425      0.90625    0.2375     0.33829499]
 [0.44375    0.80625    0.19375    0.73684211]
 [0.4        0.8375     0.18125    0.69779736]
 [0.33125    0.7875     0.21875    0.62469734]
 [0.4375     0.83125    0.1875     0.59281275]
 [0.45       0.81875    0.18125    0.64675131]
 [0.3625     0.78125    0.11875    0.28697572]
 [0.45       0.8375     0.16875    0.50760299]
 [0.41875    0.81875    0.2        0.83100381]
 [0.4625     0.79375    0.24375    0.45985899]
 [0.45       0.85       0.2125     0.54989697]
 [0.43125    0.8125     0.2375     0.7802385 ]
 [0.375      0.85       0.225      0.632552  ]
 [0.475      0.86875    0.2375     0.70769231]
 [0.35625    0.83125    0.2375     0.34742927]
 [0.4875     

In [17]:
stats = []
stats.append(list(np.mean(np.array(g1_stats),axis=0)))
stats.append(list(np.mean(np.array(g2_stats),axis=0)))
stats.append(list(np.mean(np.array(g3_stats),axis=0)))
stats = np.array(stats)
stats

array([[0.4135    , 0.88366667, 0.15633333, 0.71231838],
       [0.388     , 0.9761    , 0.02175   , 0.6337646 ],
       [0.4314375 , 0.832625  , 0.1944375 , 0.60041117]])

In [18]:
ground_truth = np.array([[0.42, 0.72, 0.22, 0.81], [0.30, 0.80, 0.12, 0.71], [0.19, 0.67, 0.15, 0.57]])  # p_rc, hr, far, q
err = np.mean(np.power(stats - ground_truth,2))
err

0.015190230591417024