In [1]:
import numpy as np
import pickle
import math
import matplotlib.pyplot as plt
import pandas as pd
import time
import pprint
import seaborn as sns
import CMR_IA as cmr
import scipy as sp
import pingouin as pg
import sys

sns.set_context("paper")
np.set_printoptions(suppress=True)
# pd.set_option('display.max_columns', None)

### Study Params

In [2]:
# # manually adjusted params
# # study params should be the same across three groups
# params.update(
#     beta_enc = 0.3,
#     gamma_fc = 0.2,
#     gamma_cf = 0.2,
#     s_fc = 0.2,
#     s_cf = 0.2,
#     phi_s = 1,
#     phi_d = 0.6,
#     d_ass = 1,
#     learn_while_retrieving = False,
#     var_enc = 1,
#     bad_enc_ratio = 1,
# )
# params.update(
#     beta_cue = 0.4,
#     beta_rec = 0.1,  # beta for retrieved item
#     beta_rec_post = 1,
#     beta_distract = 0.1,
#     # beta_rec_post = 0.1,
#     c_thresh_itm = 0.01,
#     c_thresh_ass = 0.01,
#     c_thresh = 0.01,
#     kappa = 0.01,
#     lamb = 0.002,
#     eta = 0.002,
#     omega = 3,
#     alpha = 1
# )
# params

In [3]:
sys.path.append("../../Modeling/CMR_IA/fitting")
from optimization_utils import make_boundary

# read pso results
_, _, what_to_fit = make_boundary(sim_name="S1")
optim_params = np.loadtxt("../../Modeling/CMR_IA/fitting/outparams_backup/S1_240726_2-2.txt")

# get params
# it seems that the size of all potential words matters, the more, the worse CR. RISK IT!
params = cmr.make_default_params()
for pname, pvalue in zip(what_to_fit, optim_params):
    params[pname] = pvalue
params.update(learn_while_retrieving=True, rec_time_limit=10000, use_new_context=True, d_ass=0)
params

{'beta_enc': 0.264174,
 'beta_rec': 0.238173,
 'beta_cue': 0.74472,
 'beta_rec_post': 0.937649,
 'beta_distract': 0.459556,
 'phi_s': 3.237309,
 'phi_d': 3.281996,
 's_cf': 0.957884,
 's_fc': 0.048382,
 'kappa': 0.438911,
 'eta': 0.112155,
 'omega': 5.864869,
 'alpha': 0.76891,
 'c_thresh': 0.441438,
 'c_thresh_itm': 0.538089,
 'c_thresh_ass': 0.466213,
 'd_ass': 0,
 'lamb': 0.091887,
 'rec_time_limit': 10000,
 'dt': 10,
 'nitems_in_accumulator': 50,
 'max_recalls': 50,
 'learn_while_retrieving': True,
 'a': 2800,
 'b': 20,
 'm': 0,
 'n': 1,
 'c1': 0,
 'thresh_sigma': 0,
 'No_recall': None,
 'var_enc': 1,
 'bad_enc_ratio': 1,
 'gamma_fc': 0.008891,
 'gamma_cf': 0.793915,
 'use_new_context': True}

### Simulation Function

In [4]:
def simu_success(tag, params):
    # which task
    path = "simuS1_data/simuS1_design.pkl"
    if tag == "Item-CR":
        # path = "../Data/simuS1_group3_design.pkl"
        nitems = 48 * 4  # 136
        test1_num = 80
        i = 1
    elif tag == "Pair-CR":
        # path = "../Data/simuS1_group2_design.pkl"
        nitems = 48 * 4  # 176
        test1_num = 80
        i = 2
    elif tag == "Asso-CR":
        # path = "../Data/simuS1_group1_design.pkl"
        nitems = 48 * 4  # 96
        test1_num = 40
        i = 3

    # load stimuli
    with open(path, "rb") as inp:
        df_study = pickle.load(inp)
        df_test = pickle.load(inp)
    df_study = df_study.query(f"group == {i}")
    df_test = df_test.query(f"group == {i}")

    # load semantic matrix
    s_mat = np.load("../../Data/wordpools/ltp_FR_similarity_matrix.npy")

    # update n_item in params to fit the study
    params.update(nitems_in_accumulator=nitems)
    # print(params)

    # run CMR
    df_simu, f_in_acc, f_in_dif = cmr.run_success_multi_sess(params, df_study, df_test, s_mat)
    df_simu["test"] = df_test["test"]
    df_simu = df_simu.merge(df_test, on=["session", "list", "test", "test_itemno1", "test_itemno2"])

    # get f_in
    sessions = np.unique(df_simu.session)
    tmp_corr_fin = []
    tmp_omax_fin = []

    for sess in sessions:
        df_tmp = df_study.loc[df_study.session == sess]
        tmp1 = df_tmp.study_itemno1.to_numpy()
        tmp2 = df_tmp.study_itemno2.to_numpy()
        df_tmp2 = df_test.loc[df_test.session == sess]
        tmp3 = df_tmp2.test_itemno1[df_tmp2.test_itemno1 >= 0].to_numpy()
        tmp4 = df_tmp2.test_itemno2[df_tmp2.test_itemno2 >= 0].to_numpy()
        tmp = np.concatenate((tmp1, tmp2, tmp3, tmp4))
        tmp = np.unique(tmp)  # sort
        nlists = len(np.unique(df_simu.list))

        for lst in range(nlists):
            tmp_corr = df_simu.query(f"session == {sess} and list == {lst}")["correct_ans"][test1_num:]
            corrid = np.searchsorted(tmp, tmp_corr)
            corr_fin = [f_in_dif[sess][lst * int(test1_num / 2) + i][id] for i, id in enumerate(corrid)]
            omax_fin = [np.max(np.delete(f_in_dif[sess][lst * int(test1_num / 2) + i], id)) for i, id in enumerate(corrid)]

            tmp_corr_fin = tmp_corr_fin + [-1] * test1_num + corr_fin
            tmp_omax_fin = tmp_omax_fin + [-1] * test1_num + omax_fin

    df_simu["corr_fin"] = tmp_corr_fin
    df_simu["omax_fin"] = tmp_omax_fin

    # optimal threshold for test1 (only when manually!!)
    # csim_two = df_simu.query("test==1").groupby("correct_ans").csim.mean()
    # opt_thresh = np.mean(csim_two)
    # df_simu['s_resp'] = df_simu.apply(lambda x: (1 if x['csim'] > opt_thresh else 0) if x['test'] == 1 else x['s_resp'], axis=1)

    return df_simu

### Analysis Function

In [5]:
# anal_perform used by pso
sys.path.append("../../Modeling/CMR_IA/fitting")
from object_funcs import anal_perform_S1 as anal_perform

In [6]:
# def anal_perform(df_simu):
#     # Get correctness
#     df_simu["correct"] = df_simu.s_resp == df_simu.correct_ans

#     # Recognition performance
#     df_recog = df_simu.query("test==1")
#     recog_resp = df_recog["s_resp"].to_numpy()
#     is_old = df_recog["correct_ans"].to_numpy()
#     is_new = 1 - is_old
#     old_num = np.sum(is_old)
#     new_num = np.sum(is_new)
#     hr = np.sum(recog_resp * is_old) / old_num
#     far = np.sum(recog_resp * is_new) / new_num

#     # Cued recall performance
#     df_cr = df_simu.query("test==2")
#     cr_resp = df_cr["s_resp"].to_numpy()
#     cr_truth = df_cr["correct_ans"].to_numpy()
#     p_rc = np.mean(cr_resp == cr_truth)

#     # successive test performance and calculate Q
#     df_simu_study = df_simu.query("pair_idx >= 0")
#     df_pair = pd.pivot_table(df_simu_study, index="pair_idx", columns="test", values="correct")
#     test1_resp = df_pair[1].to_numpy(dtype=int)
#     test2_resp = df_pair[2].to_numpy(dtype=int)
#     A = np.sum((test1_resp == 1) & (test2_resp == 1)) + 0.5
#     B = np.sum((test1_resp == 0) & (test2_resp == 1)) + 0.5
#     C = np.sum((test1_resp == 1) & (test2_resp == 0)) + 0.5
#     D = np.sum((test1_resp == 0) & (test2_resp == 0)) + 0.5
#     q = (A * D - B * C) / (A * D + B * C)

#     return p_rc, hr, far, q

### Item - CR (Group1)

In [7]:
tag = "Item-CR"
df_simu_g1 = simu_success(tag, params)
df_simu_g1

CMR2 Time: 75.26610207557678


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,correct_ans,order,pair_idx,subject,group,test_item2,corr_fin,omax_fin
0,0,0,609,-1,0.0,0.701214,0.123473,1,FROST,1,0,14,0,1,,-1.000000,-1.000000
1,0,0,101,-1,0.0,0.213678,0.064056,1,BATTERY,0,-1,-1,0,1,,-1.000000,-1.000000
2,0,0,927,-1,0.0,0.210802,0.063378,1,MOVIE,1,1,13,0,1,,-1.000000,-1.000000
3,0,0,1303,-1,0.0,0.284026,0.078286,1,SIDEWALK,1,0,31,0,1,,-1.000000,-1.000000
4,0,0,945,-1,0.0,0.239791,0.069821,1,NEST,1,0,2,0,1,,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,99,3,509,-1,-1.0,-1.000000,-1.000000,2,DUSTPAN,128,1,149,99,1,,-0.622198,-0.395961
47996,99,3,2,-1,-1.0,-1.000000,-1.000000,2,ACID,970,1,163,99,1,,-0.467549,-0.440031
47997,99,3,1463,-1,-1.0,-1.000000,-1.000000,2,TICK,1255,0,171,99,1,,-0.397174,-0.379490
47998,99,3,1615,-1,-1.0,-1.000000,-1.000000,2,WINNER,667,1,157,99,1,,-0.644177,-0.468187


In [8]:
subjects = np.unique(df_simu_g1.subject)
g1_stats = []
for subj in subjects:
    df_subj = df_simu_g1.query(f"subject == {subj}").copy()
    g1_stats.append(list(anal_perform(df_subj)))
g1_stats = np.array(g1_stats)

In [9]:
print(g1_stats)
print("mean:")
print(np.mean(g1_stats, axis=0))
print("se:")
print(sp.stats.sem(g1_stats, axis=0))

[[0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         0.         0.         0.99378882]
 [0.         

In [10]:
# calculate d prime
g1_ds = sp.stats.norm.ppf(g1_stats[:, 1]) - sp.stats.norm.ppf(g1_stats[:, 2])
print(np.mean(g1_ds))
print(sp.stats.sem(g1_ds))

nan
nan


  g1_ds = sp.stats.norm.ppf(g1_stats[:, 1]) - sp.stats.norm.ppf(g1_stats[:, 2])


### Pair - CR (Group2)

In [11]:
tag = "Pair-CR"
df_simu_g2 = simu_success(tag, params)
df_simu_g2

CMR2 Time: 55.69653296470642


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,correct_ans,order,pair_idx,subject,group,test_item2,corr_fin,omax_fin
0,0,0,192,1402,0.0,0.214567,0.064264,1,BULB,1,1,13,0,2,SUIT,-1.000000,-1.000000
1,0,0,810,1481,0.0,0.350211,0.088759,1,LEMON,0,-1,-1,0,2,TOOTH,-1.000000,-1.000000
2,0,0,384,686,0.0,0.387828,0.093860,1,COTTAGE,1,1,8,0,2,HANDKERCHIEF,-1.000000,-1.000000
3,0,0,1317,1137,0.0,0.101749,0.026958,1,SKILLET,0,-1,-1,0,2,PUBLISHER,-1.000000,-1.000000
4,0,0,506,977,0.0,0.255158,0.072927,1,DUNE,0,-1,-1,0,2,ONION,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35995,99,2,120,-1,-1.0,-1.000000,-1.000000,2,BELT,917,0,97,99,2,-1,-0.672426,-0.464047
35996,99,2,328,-1,-1.0,-1.000000,-1.000000,2,CLOUD,869,0,113,99,2,-1,-0.598614,-0.484627
35997,99,2,456,-1,-1.0,-1.000000,-1.000000,2,DIAGRAM,1582,0,110,99,2,-1,-0.531210,-0.470419
35998,99,2,655,-1,-1.0,-1.000000,-1.000000,2,GRASS,1520,0,114,99,2,-1,-0.368879,-0.373860


In [12]:
df_simu_g2.query("session == 0 and list == 0 and test == 1")

Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,correct_ans,order,pair_idx,subject,group,test_item2,corr_fin,omax_fin
0,0,0,192,1402,0.0,0.214567,0.064264,1,BULB,1,1,13,0,2,SUIT,-1.0,-1.0
1,0,0,810,1481,0.0,0.350211,0.088759,1,LEMON,0,-1,-1,0,2,TOOTH,-1.0,-1.0
2,0,0,384,686,0.0,0.387828,0.093860,1,COTTAGE,1,1,8,0,2,HANDKERCHIEF,-1.0,-1.0
3,0,0,1317,1137,0.0,0.101749,0.026958,1,SKILLET,0,-1,-1,0,2,PUBLISHER,-1.0,-1.0
4,0,0,506,977,0.0,0.255158,0.072927,1,DUNE,0,-1,-1,0,2,ONION,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,0,0,1635,1509,0.0,0.160984,0.049898,1,ZEBRA,1,1,2,0,2,TROPHY,-1.0,-1.0
76,0,0,1490,1612,0.0,0.206237,0.062284,1,TOWER,1,1,34,0,2,WINDSHIELD,-1.0,-1.0
77,0,0,1153,1314,0.0,0.315521,0.083544,1,RACCOON,1,1,35,0,2,SKELETON,-1.0,-1.0
78,0,0,172,579,0.0,0.272457,0.076206,1,BRICK,0,-1,-1,0,2,FLESH,-1.0,-1.0


In [37]:
subjects = np.unique(df_simu_g2.subject)
g2_stats = []
for subj in subjects:
    df_subj = df_simu_g2.query(f"subject == {subj}").copy()
    g2_stats.append(list(anal_perform(df_subj)))
g2_stats = np.array(g2_stats)

In [14]:
print(g2_stats)
print("mean:")
print(np.mean(g2_stats, axis=0))
print("se:")
print(sp.stats.sem(g2_stats, axis=0))

[[0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         0.         0.         0.99173554]
 [0.         

In [15]:
# hack hr and far
old_num = 40
new_num = 40
g2_stats_hack = np.array(g2_stats)
hacked_hr = (g2_stats_hack[:, 1] * old_num + 0.5) / (old_num + 1)
hacked_far = (g2_stats_hack[:, 2] * new_num + 0.5) / (new_num + 1)
g2_stats_hack[:, 1] = hacked_hr
g2_stats_hack[:, 2] = hacked_far
g2_stats_hack

array([[0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512, 0.01219512, 0.99173554],
       [0.        , 0.01219512,

In [16]:
g2_stats = np.array(g2_stats)
g2_stats = g2_stats_hack
g2_ds = sp.stats.norm.ppf(g2_stats[:, 1]) - sp.stats.norm.ppf(g2_stats[:, 2])
print(np.mean(g2_ds))
print(sp.stats.sem(g2_ds))

0.0
0.0


### Association - CR (Group3)

In [17]:
tag = "Asso-CR"
df_simu_g3 = simu_success(tag, params)
df_simu_g3

CMR2 Time: 54.718082666397095


Unnamed: 0,session,list,test_itemno1,test_itemno2,s_resp,s_rt,csim,test,test_item1,correct_ans,order,pair_idx,subject,group,test_item2,corr_fin,omax_fin
0,0,0,1213,926,0.0,0.401325,0.095571,1,ROUGE,0,1,-1,0,3,MOUTH,-1.000000,-1.000000
1,0,0,830,717,0.0,0.177674,0.054830,1,LIST,1,1,34,0,3,HORIZON,-1.000000,-1.000000
2,0,0,577,1183,0.0,0.127938,0.038410,1,FLEA,1,1,0,0,3,RELISH,-1.000000,-1.000000
3,0,0,588,439,0.0,0.186849,0.057347,1,FOLDER,1,0,25,0,3,DANDRUFF,-1.000000,-1.000000
4,0,0,687,1311,0.0,0.219244,0.065342,1,HANGER,1,1,22,0,3,SIRLOIN,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,99,4,1169,-1,-1.0,-1.000000,-1.000000,2,RAT,331,0,226,99,3,-1,-0.490387,-0.365532
29996,99,4,1349,-1,-1.0,-1.000000,-1.000000,2,SPHINX,848,1,219,99,3,-1,-0.496436,-0.381463
29997,99,4,1307,-1,-1.0,-1.000000,-1.000000,2,SILK,72,0,194,99,3,-1,-0.622874,-0.392926
29998,99,4,984,-1,-1.0,-1.000000,-1.000000,2,ORGAN,899,1,196,99,3,-1,-0.562728,-0.464199


In [18]:
subjects = np.unique(df_simu_g3.subject)
g3_stats = []
for subj in subjects:
    df_subj = df_simu_g3.query(f"subject == {subj}").copy()
    # df_subj = df_simu_g3.query(f"subject == {subj} and list == 0").copy()
    g3_stats.append(list(anal_perform(df_subj)))
g3_stats = np.array(g3_stats)

In [19]:
print(g3_stats)
print("mean:")
print(np.mean(g3_stats, axis=0))
print("se:")
print(sp.stats.sem(g3_stats, axis=0))

[[0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         0.         0.         0.99009901]
 [0.         

In [20]:
g3_ds = sp.stats.norm.ppf(g3_stats[:, 1]) - sp.stats.norm.ppf(g3_stats[:, 2])
print(np.mean(g3_ds[np.isfinite(g3_ds)]))
print(sp.stats.sem(g3_ds[np.isfinite(g3_ds)]))

nan
nan


  g3_ds = sp.stats.norm.ppf(g3_stats[:, 1]) - sp.stats.norm.ppf(g3_stats[:, 2])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


### Aggregate Three Groups

In [38]:
stats = []
stats.append(list(np.mean(np.array(g1_stats), axis=0)))
stats.append(list(np.mean(np.array(g2_stats), axis=0)))
stats.append(list(np.mean(np.array(g3_stats), axis=0)))
stats = np.array(stats)
stats

array([[0.        , 0.        , 0.        , 0.99378882],
       [0.        , 0.        , 0.        , 0.99173554],
       [0.        , 0.        , 0.        , 0.99009901]])

In [39]:
ground_truth = np.array([[0.19, 0.67, 0.15, 0.57], [0.30, 0.80, 0.12, 0.71], [0.42, 0.72, 0.22, 0.81]])  # p_rc, hr, far, q
err = np.mean(np.power(stats - ground_truth, 2))
err

0.19054229417790017

### Performance Analysis

In [23]:
def array2df(group_stats, group_num):
    group_stats = np.array(group_stats)
    df = pd.DataFrame(group_stats, columns=["PR", "HR", "FAR", "Q"])
    df["d"] = sp.stats.norm.ppf(np.array(group_stats[:, 1])) - sp.stats.norm.ppf(np.array(group_stats[:, 2]))
    df["subject"] = df.index
    df["group"] = group_num
    return df


df_group1 = array2df(g1_stats, 1)
df_group2 = array2df(g2_stats, 2)
df_group3 = array2df(g3_stats, 3)
df_groups = pd.concat([df_group1, df_group2, df_group3])
df_groups

  df["d"] = sp.stats.norm.ppf(np.array(group_stats[:, 1])) - sp.stats.norm.ppf(np.array(group_stats[:, 2]))
  df["d"] = sp.stats.norm.ppf(np.array(group_stats[:, 1])) - sp.stats.norm.ppf(np.array(group_stats[:, 2]))


Unnamed: 0,PR,HR,FAR,Q,d,subject,group
0,0.0,0.0,0.0,0.993789,,0,1
1,0.0,0.0,0.0,0.993789,,1,1
2,0.0,0.0,0.0,0.993789,,2,1
3,0.0,0.0,0.0,0.993789,,3,1
4,0.0,0.0,0.0,0.993789,,4,1
...,...,...,...,...,...,...,...
95,0.0,0.0,0.0,0.990099,,95,3
96,0.0,0.0,0.0,0.990099,,96,3
97,0.0,0.0,0.0,0.990099,,97,3
98,0.0,0.0,0.0,0.990099,,98,3


In [24]:
pg.anova(data=df_groups, dv="PR", between="group", detailed=True)

  fval = msbetween / mserror
  np2 = ssbetween / (ssbetween + sserror)  # = ssbetween / sstotal


Unnamed: 0,Source,SS,DF,MS
0,group,0.0,2,0.0
1,Within,0.0,297,0.0


In [25]:
pg.pairwise_tukey(data=df_groups, dv="PR", between="group")

  fval = msbetween / mserror
  np2 = ssbetween / (ssbetween + sserror)  # = ssbetween / sstotal
  tval = mn / se
  d = (x.mean() - y.mean()) / poolsd


Unnamed: 0,A,B,mean(A),mean(B),diff,se,T,p-tukey,hedges
0,1,2,0.0,0.0,0.0,0.0,,,
1,1,3,0.0,0.0,0.0,0.0,,,
2,2,3,0.0,0.0,0.0,0.0,,,


In [26]:
pg.anova(data=df_groups.loc[np.isfinite(df_groups.d)], dv="d", between="group", detailed=True)

  msbetween = ssbetween / ddof1
  np2 = ssbetween / (ssbetween + sserror)  # = ssbetween / sstotal


Unnamed: 0,Source,SS,DF,MS
0,group,0.0,0,
1,Within,0.0,99,0.0


In [27]:
pg.anova(data=df_groups, dv="Q", between="group", detailed=True)

Unnamed: 0,Source,SS,DF,MS,F,p-unc,np2
0,group,0.0006836296,2,0.0003418148,1.64724e+28,0.0,1.0
1,Within,6.1629759999999994e-30,297,2.075076e-32,,,


In [28]:
pg.pairwise_tukey(data=df_groups, dv="Q", between="group")

Unnamed: 0,A,B,mean(A),mean(B),diff,se,T,p-tukey,hedges
0,1,2,0.993789,0.991736,0.002053,2.037192e-17,100789800000000.0,0.0,25925120000000.0
1,1,3,0.993789,0.990099,0.00369,2.037192e-17,181122300000000.0,0.0,20834880000000.0
2,2,3,0.991736,0.990099,0.001637,2.037192e-17,80332490000000.0,0.0,10331550000000.0


### Symmetry Analysis

In [29]:
def test_sym(df_simu, testnum):
    df = df_simu.query(f"test == {testnum} and order >= 0").copy()
    df["correct"] = df["s_resp"] == df["correct_ans"]
    df_order = df.groupby(["order", "subject"]).correct.mean().to_frame(name="p_correct").reset_index()
    return df_order

In [30]:
df_simu_all = pd.concat([df_simu_g1, df_simu_g2, df_simu_g3])
df_order_recog = test_sym(df_simu_all, 1)
print(df_order_recog.groupby("order").p_correct.mean())
pg.pairwise_tests(df_order_recog, dv="p_correct", subject="subject", within="order")

order
0    0.208750
1    0.207917
Name: p_correct, dtype: float64


Unnamed: 0,Contrast,A,B,Paired,Parametric,T,dof,alternative,p-unc,BF10,hedges
0,order,0,1,True,True,0.286632,99.0,two-sided,0.774993,0.115,0.057109


In [31]:
df_order_g1_recog = test_sym(df_simu_g1, 1)
print(df_order_g1_recog.groupby("order").p_correct.mean())
pg.pairwise_tests(df_order_g1_recog, dv="p_correct", subject="subject", within="order")

order
0    0.0
1    0.0
Name: p_correct, dtype: float64


  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)
  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)


Unnamed: 0,Contrast,A,B,Paired,Parametric,dof,alternative,BF10
0,order,0,1,True,True,99.0,two-sided,


In [32]:
df_order_g1_cr = test_sym(df_simu_g1, 2)
print(df_order_g1_cr.groupby("order").p_correct.mean())
pg.pairwise_tests(df_order_g1_cr, dv="p_correct", subject="subject", within="order")

order
0    0.0
1    0.0
Name: p_correct, dtype: float64


  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)
  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)


Unnamed: 0,Contrast,A,B,Paired,Parametric,dof,alternative,BF10
0,order,0,1,True,True,99.0,two-sided,


In [33]:
df_order_g2_recog = test_sym(df_simu_g2, 1)
print(df_order_g2_recog.groupby("order").p_correct.mean())
pg.pairwise_tests(df_order_g2_recog, dv="p_correct", subject="subject", within="order")

order
0    0.0
1    0.0
Name: p_correct, dtype: float64


  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)
  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)


Unnamed: 0,Contrast,A,B,Paired,Parametric,dof,alternative,BF10
0,order,0,1,True,True,99.0,two-sided,


In [34]:
df_order_g2_cr = test_sym(df_simu_g2, 2)
print(df_order_g2_cr.groupby("order").p_correct.mean())
pg.pairwise_tests(df_order_g2_cr, dv="p_correct", subject="subject", within="order")

order
0    0.0
1    0.0
Name: p_correct, dtype: float64


  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)
  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)


Unnamed: 0,Contrast,A,B,Paired,Parametric,dof,alternative,BF10
0,order,0,1,True,True,99.0,two-sided,


In [35]:
df_order_g3_recog = test_sym(df_simu_g3, 1)
print(df_order_g3_recog.groupby("order").p_correct.mean())
pg.pairwise_tests(df_order_g3_recog, dv="p_correct", subject="subject", within="order")

order
0    0.501
1    0.499
Name: p_correct, dtype: float64


Unnamed: 0,Contrast,A,B,Paired,Parametric,T,dof,alternative,p-unc,BF10,hedges
0,order,0,1,True,True,0.286632,99.0,two-sided,0.774993,0.115,0.057109


In [36]:
df_order_g3_cr = test_sym(df_simu_g3, 2)
print(df_order_g3_cr.groupby("order").p_correct.mean())
pg.pairwise_tests(df_order_g3_cr, dv="p_correct", subject="subject", within="order")

order
0    0.0
1    0.0
Name: p_correct, dtype: float64


  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)
  d = (x.mean() - y.mean()) / np.sqrt((x.var(ddof=1) + y.var(ddof=1)) / 2)


Unnamed: 0,Contrast,A,B,Paired,Parametric,dof,alternative,BF10
0,order,0,1,True,True,99.0,two-sided,
