In [44]:
import pandas as pd
import numpy as np
import pickle

In [45]:
# 1638 words
with open("../Data/wordpools/wasnorm_wordpool.txt") as f:
    items = f.readlines()
items = [i.replace("\n","") for i in items]
items = np.array(items)

In [46]:
simu_sess_num = 1000
n = 48
v = n-8  # last 8 not tested
wordpool = np.arange(1,1639)

### Group1: Association Recog - CR

In [47]:
df_g1_study = pd.DataFrame()
df_g1_test = pd.DataFrame()
rng = np.random.default_rng(seed=42)

for sess in range(simu_sess_num):
    tmp_df_study = pd.DataFrame()
    tmp_df_test1 = pd.DataFrame()
    tmp_df_test2 = pd.DataFrame()

    # study
    pres_words = rng.choice(wordpool, 2*n, replace = False)
    pres_words = np.reshape(pres_words, (n,2))
    pair_order = rng.permutation([0]*int(v/2) + [1]*int(v/2))  # 0: A-B, 1: B-A (during CR)
    tmp_df_study['study_itemno1'] = pres_words[:,0]
    tmp_df_study['study_itemno2'] = pres_words[:,1]
    tmp_df_study['study_item1'] = items[pres_words[:,0]-1]
    tmp_df_study['study_item2'] = items[pres_words[:,1]-1]
    tmp_df_study['session'] = sess
    tmp_df_study['order'] = np.concatenate((pair_order, [-1]*8))
    tmp_df_study['pair_idx'] = np.arange(n)
    df_g1_study = pd.concat([df_g1_study,tmp_df_study])
    valid_words = pres_words[0:v,:]

    # test1: association recognition
    intact_idx = rng.permutation([True]*int(v/2) + [False]*int(v/2))
    intact_pair_idx = np.arange(v)[intact_idx]
    intact_pairs_raw = valid_words[intact_idx,:]
    intact_pairs = valid_words[intact_idx,:]
    intact_order = pair_order[intact_idx] # 0:A-B, 1:B-A
    for i in range(intact_pairs.shape[0]):
        if intact_order[i] == 1:
            intact_pairs[i] = np.flip(intact_pairs[i])

    rearrange_idx = ~intact_idx
    rearrange_pair_idx = np.array([-1]*int(v/2))
    rearrange_pairs = valid_words[rearrange_idx,:]
    rearrange_pairs[:,1] = rng.permutation(rearrange_pairs[:,1])
    rearrange_order = pair_order[rearrange_idx] # 0:A-B, 1:B-A
    for i in range(rearrange_pairs.shape[0]):
        if rearrange_order[i] == 1:
            rearrange_pairs[i] = np.flip(rearrange_pairs[i])

    test1_pairs = np.concatenate((intact_pairs,rearrange_pairs),axis=0)
    test1_pair_idx = np.concatenate((intact_pair_idx,rearrange_pair_idx),axis=0)
    test1_type = np.array([1]*int(v/2)+[0]*int(v/2))  #1:intact, 0:rearranged
    test1_order = np.concatenate((intact_order,rearrange_order),axis=0)
    tmp_df_test1['test_itemno1'] = test1_pairs[:,0]
    tmp_df_test1['test_itemno2'] = test1_pairs[:,1]
    tmp_df_test1['test_item1'] = items[test1_pairs[:,0]-1]
    tmp_df_test1['test_item2'] = items[test1_pairs[:,1]-1]
    tmp_df_test1['correct_ans'] = test1_type
    tmp_df_test1['order'] = test1_order
    tmp_df_test1['pair_idx'] = test1_pair_idx
    tmp_df_test1['test'] = 1
    tmp_df_test1['session'] = sess
    tmp_df_test1 = tmp_df_test1.iloc[rng.permutation(tmp_df_test1.index)].reset_index(drop=True)
    
    # test2: cued recall
    cued_order = pair_order[intact_idx] # 0:A-B, 1:B-A
    test2_probe = np.array([intact_pairs_raw[i,cued_order[i]] for i in range(int(v/2))])
    test2_ans = np.array([intact_pairs_raw[i,1-cued_order[i]] for i in range(int(v/2))])
    tmp_df_test2['test_itemno1'] = test2_probe
    tmp_df_test2['test_itemno2'] = -1
    tmp_df_test2['test_item1'] = items[test2_probe-1]
    tmp_df_test2['test_item2'] = -1
    tmp_df_test2['correct_ans'] = test2_ans
    tmp_df_test2['order'] = cued_order
    tmp_df_test2['pair_idx'] = intact_pair_idx
    tmp_df_test2['test'] = 2
    tmp_df_test2['session'] = sess
    tmp_df_test2 = tmp_df_test2.iloc[rng.permutation(tmp_df_test2.index)].reset_index(drop=True)
    df_g1_test = pd.concat([df_g1_test,tmp_df_test1,tmp_df_test2])

df_g1_study.reset_index(inplace = True, drop=True)
df_g1_test.reset_index(inplace = True, drop=True)  

In [48]:
# df_g1_study.query("session==0")

In [49]:
# df_g1_test.query("session==0")

In [50]:
# save
with open('../Data/simuS1_group1_design.pkl', 'wb') as outp:
    pickle.dump(df_g1_study, outp, pickle.HIGHEST_PROTOCOL)
    pickle.dump(df_g1_test, outp, pickle.HIGHEST_PROTOCOL)

### Group2: Pair Recog - CR

In [51]:
df_g2_study = pd.DataFrame()
df_g2_test = pd.DataFrame()
rng = np.random.default_rng(seed=42)

for sess in range(simu_sess_num):
    tmp_df_study = pd.DataFrame()
    tmp_df_test1 = pd.DataFrame()
    tmp_df_test2 = pd.DataFrame()

    # study
    all_words = rng.choice(wordpool, 2*(n+v), replace=False)
    pres_words = all_words[0:2*n]
    pres_words = np.reshape(pres_words, (n,2))
    pair_order = rng.permutation([0]*int(v/2) + [1]*int(v/2))  # 0: A-B, 1: B-A (during CR)
    tmp_df_study['study_itemno1'] = pres_words[:,0]
    tmp_df_study['study_itemno2'] = pres_words[:,1]
    tmp_df_study['study_item1'] = items[pres_words[:,0]-1]
    tmp_df_study['study_item2'] = items[pres_words[:,1]-1]
    tmp_df_study['session'] = sess
    tmp_df_study['order'] = np.concatenate((pair_order, [-1]*8))
    tmp_df_study['pair_idx'] = np.arange(n)
    df_g2_study = pd.concat([df_g2_study,tmp_df_study])
    valid_words = pres_words[0:v,:]

    # test1: pair recognition
    old_pairs = valid_words.copy()
    old_order = pair_order # 0:A-B, 1:B-A
    for i in range(old_pairs.shape[0]):
        if old_order[i] == 1:
            old_pairs[i] = np.flip(old_pairs[i])
    old_pair_idx = np.arange(v)

    new_words = all_words[2*n:]
    new_pairs = np.reshape(new_words, (v,2))
    new_order = [-1] * v
    new_pair_idx = np.array([-1] * v)

    test1_pairs = np.concatenate((old_pairs,new_pairs),axis=0)
    test1_type = np.array([1]*v+[0]*v)  #1:old, 0:new
    test1_order = np.concatenate((old_order,new_order),axis=0)
    test1_pair_idx = np.concatenate((old_pair_idx,new_pair_idx),axis=0)
    tmp_df_test1['test_itemno1'] = test1_pairs[:,0]
    tmp_df_test1['test_itemno2'] = test1_pairs[:,1]
    tmp_df_test1['test_item1'] = items[test1_pairs[:,0]-1]
    tmp_df_test1['test_item2'] = items[test1_pairs[:,1]-1]
    tmp_df_test1['correct_ans'] = test1_type
    tmp_df_test1['order'] = test1_order
    tmp_df_test1['pair_idx'] = test1_pair_idx
    tmp_df_test1['test'] = 1
    tmp_df_test1['session'] = sess
    tmp_df_test1 = tmp_df_test1.iloc[rng.permutation(tmp_df_test1.index)].reset_index(drop=True)

    # test2: cued recall
    cued_order = pair_order # 0:A-B, 1:B-A
    test2_probe = np.array([valid_words[i,cued_order[i]] for i in range(v)])
    test2_ans = np.array([valid_words[i,1-cued_order[i]] for i in range(v)])
    tmp_df_test2['test_itemno1'] = test2_probe
    tmp_df_test2['test_itemno2'] = -1
    tmp_df_test2['test_item1'] = items[test2_probe-1]
    tmp_df_test2['test_item2'] = -1
    tmp_df_test2['correct_ans'] = test2_ans
    tmp_df_test2['order'] = cued_order
    tmp_df_test2['pair_idx'] = np.arange(v)
    tmp_df_test2['test'] = 2
    tmp_df_test2['session'] = sess
    tmp_df_test2 = tmp_df_test2.iloc[rng.permutation(tmp_df_test2.index)].reset_index(drop=True)
    df_g2_test = pd.concat([df_g2_test,tmp_df_test1,tmp_df_test2])

df_g2_study.reset_index(inplace = True, drop=True)
df_g2_test.reset_index(inplace = True, drop=True)  

In [52]:
# df_g2_study.query("session==0")

In [53]:
# df_g2_test.iloc[60:120]

In [54]:
# save
with open('../Data/simuS1_group2_design.pkl', 'wb') as outp:
    pickle.dump(df_g2_study, outp, pickle.HIGHEST_PROTOCOL)
    pickle.dump(df_g2_test, outp, pickle.HIGHEST_PROTOCOL)

### Group3: Item Recog - CR

In [55]:
df_g3_study = pd.DataFrame()
df_g3_test = pd.DataFrame()
rng = np.random.default_rng(seed=42)

for sess in range(simu_sess_num):
    tmp_df_study = pd.DataFrame()
    tmp_df_test1 = pd.DataFrame()
    tmp_df_test2 = pd.DataFrame()

    # study
    all_words = rng.choice(wordpool, 2*n+v, replace=False)
    pres_words = all_words[0:2*n]
    pres_words = np.reshape(pres_words, (n,2))
    pair_order = rng.permutation([0]*int(v/2) + [1]*int(v/2)) # 0: A-B, 1: B-A (during CR)
    tmp_df_study['study_itemno1'] = pres_words[:,0]
    tmp_df_study['study_itemno2'] = pres_words[:,1]
    tmp_df_study['study_item1'] = items[pres_words[:,0]-1]
    tmp_df_study['study_item2'] = items[pres_words[:,1]-1]
    tmp_df_study['session'] = sess
    tmp_df_study['order'] = np.concatenate((pair_order, [-1]*8))
    tmp_df_study['pair_idx'] = np.arange(n)
    df_g3_study = pd.concat([df_g3_study,tmp_df_study])
    valid_words = pres_words[0:v,:]

    # test1: item recognition
    old_order = pair_order # 0:B, 1:A
    old_probe = np.array([valid_words[i,1-old_order[i]] for i in range(v)])
    old_pair_idx = np.arange(v)
    new_probe = all_words[2*n:]
    new_order = [-1] * v
    new_pair_idx = np.array([-1] * v)

    test1_probe = np.concatenate((old_probe,new_probe),axis=0)
    test1_type = np.array([1]*v+[0]*v)  #1:old, 0:new
    test1_order = np.concatenate((old_order,new_order),axis=0)
    test1_pair_idx = np.concatenate((old_pair_idx,new_pair_idx),axis=0)
    tmp_df_test1['test_itemno1'] = test1_probe
    tmp_df_test1['test_itemno2'] = -1
    tmp_df_test1['test_item1'] = items[test1_probe-1]
    tmp_df_test1['correct_ans'] = test1_type
    tmp_df_test1['order'] = test1_order
    tmp_df_test1['pair_idx'] = test1_pair_idx
    tmp_df_test1['test'] = 1
    tmp_df_test1['session'] = sess
    tmp_df_test1 = tmp_df_test1.iloc[rng.permutation(tmp_df_test1.index)].reset_index(drop=True)

    # test2: cued recall
    cued_order = pair_order # 0:A-B, 1:B-A
    test2_probe = np.array([valid_words[i,cued_order[i]] for i in range(v)])
    test2_ans = np.array([valid_words[i,1-cued_order[i]] for i in range(v)])
    tmp_df_test2['test_itemno1'] = test2_probe
    tmp_df_test2['test_itemno2'] = -1
    tmp_df_test2['test_item1'] = items[test2_probe-1]
    tmp_df_test2['correct_ans'] = test2_ans
    tmp_df_test2['order'] = cued_order
    tmp_df_test2['pair_idx'] = np.arange(v)
    tmp_df_test2['test'] = 2
    tmp_df_test2['session'] = sess
    tmp_df_test2 = tmp_df_test2.iloc[rng.permutation(tmp_df_test2.index)].reset_index(drop=True)
    df_g3_test = pd.concat([df_g3_test,tmp_df_test1,tmp_df_test2])

df_g3_study.reset_index(inplace = True, drop=True)
df_g3_test.reset_index(inplace = True, drop=True)  

In [56]:
df_g3_study.query("session==0")

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,session,order,pair_idx
0,186,279,BUFFALO,CHAMPION,0,1,0
1,255,776,CASKET,KIDNEY,0,1,1
2,938,1259,NAVIGATOR,SEA,0,1,2
3,582,1115,FLORIDA,POTATO,0,0,3
4,224,694,CAMERA,HAYSTACK,0,0,4
5,1343,461,SOFTBALL,DICE,0,0,5
6,742,898,INTESTINE,MINT,0,1,6
7,1165,619,RAM,GANG,0,1,7
8,1321,1478,SKUNK,TOMBSTONE,0,1,8
9,1489,566,TOWEL,FIN,0,1,9


In [57]:
df_g3_test.iloc[60:120]

Unnamed: 0,test_itemno1,test_itemno2,test_item1,correct_ans,order,pair_idx,test,session
60,616,-1,FURNITURE,1,0,37,1,0
61,694,-1,HAYSTACK,1,0,4,1,0
62,299,-1,CHILD,0,-1,-1,1,0
63,1385,-1,STONE,1,0,29,1,0
64,742,-1,INTESTINE,1,1,6,1,0
65,321,-1,CLERK,0,-1,-1,1,0
66,852,-1,MACHINE,1,1,12,1,0
67,398,-1,CRACKER,0,-1,-1,1,0
68,1054,-1,PHILOSOPHER,0,-1,-1,1,0
69,1516,-1,TUBE,1,0,30,1,0


In [58]:
# save
with open('../Data/simuS1_group3_design.pkl', 'wb') as outp:
    pickle.dump(df_g3_study, outp, pickle.HIGHEST_PROTOCOL)
    pickle.dump(df_g3_test, outp, pickle.HIGHEST_PROTOCOL)