In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
# 1638 words
with open("../Data/wordpools/wasnorm_wordpool.txt") as f:
    items = f.readlines()
items = [i.replace("\n","") for i in items]
items = np.array(items)

In [3]:
simu_sess_num = 1000
n = 48
v = n-8  # last 8 not tested
wordpool = np.arange(1,1639)

### Group1: Association Recog - CR

In [4]:
df_g1_study = pd.DataFrame()
df_g1_test = pd.DataFrame()
rng = np.random.default_rng(seed=42)

for sess in range(simu_sess_num):
    tmp_df_study = pd.DataFrame()
    tmp_df_test1 = pd.DataFrame()
    tmp_df_test2 = pd.DataFrame()

    # study
    pres_words = rng.choice(wordpool, 2*n, replace = False)
    pres_words = np.reshape(pres_words, (n,2))
    tmp_df_study['study_itemno1'] = pres_words[:,0]
    tmp_df_study['study_itemno2'] = pres_words[:,1]
    tmp_df_study['study_item1'] = items[pres_words[:,0]-1]
    tmp_df_study['study_item2'] = items[pres_words[:,1]-1]
    tmp_df_study['session'] = sess
    tmp_df_study['pair_idx'] = np.arange(n)
    df_g1_study = pd.concat([df_g1_study,tmp_df_study])
    valid_words = pres_words[0:v,:]

    # test1: association recognition
    intact_idx = rng.permutation([True]*int(v/2) + [False]*int(v/2))
    intact_pair_idx = np.arange(v)[intact_idx]
    intact_pairs_raw = valid_words[intact_idx,:]
    intact_pairs = valid_words[intact_idx,:]
    intact_order = rng.permutation([0]*int(v/4) + [1]*int(v/4)) # 0:A-B, 1:B-A
    for i in range(intact_pairs.shape[0]):
        if intact_order[i] == 1:
            intact_pairs[i] = np.flip(intact_pairs[i])

    rearrange_idx = ~intact_idx
    rearrange_pair_idx = np.array([-1]*int(v/2))
    rearrange_pairs = valid_words[rearrange_idx,:]
    rearrange_pairs[:,1] = rng.permutation(rearrange_pairs[:,1])
    rearrange_order = rng.permutation([0]*int(v/4) + [1]*int(v/4)) # 0:A-B, 1:B-A
    for i in range(rearrange_pairs.shape[0]):
        if rearrange_order[i] == 1:
            rearrange_pairs[i] = np.flip(rearrange_pairs[i])

    test1_pairs = np.concatenate((intact_pairs,rearrange_pairs),axis=0)
    test1_pair_idx = np.concatenate((intact_pair_idx,rearrange_pair_idx),axis=0)
    test1_type = np.array([1]*int(v/2)+[0]*int(v/2))  #1:intact, 0:rearranged
    test1_order = np.concatenate((intact_order,rearrange_order),axis=0)
    tmp_df_test1['test_itemno1'] = test1_pairs[:,0]
    tmp_df_test1['test_itemno2'] = test1_pairs[:,1]
    tmp_df_test1['test_item1'] = items[test1_pairs[:,0]-1]
    tmp_df_test1['test_item2'] = items[test1_pairs[:,1]-1]
    tmp_df_test1['correct_ans'] = test1_type
    tmp_df_test1['order'] = test1_order
    tmp_df_test1['pair_idx'] = test1_pair_idx
    tmp_df_test1['test'] = 1
    tmp_df_test1['session'] = sess
    tmp_df_test1 = tmp_df_test1.iloc[rng.permutation(tmp_df_test1.index)].reset_index(drop=True)
    
    # test2: cued recall
    cued_order = rng.permutation([0]*int(v/4) + [1]*int(v/4)) # 0:A-B, 1:B-A
    test2_probe = np.array([intact_pairs_raw[i,cued_order[i]] for i in range(int(v/2))])
    test2_ans = np.array([intact_pairs_raw[i,1-cued_order[i]] for i in range(int(v/2))])
    tmp_df_test2['test_itemno1'] = test2_probe
    tmp_df_test2['test_itemno2'] = -1
    tmp_df_test2['test_item1'] = items[test2_probe-1]
    tmp_df_test2['test_item2'] = -1
    tmp_df_test2['correct_ans'] = test2_ans
    tmp_df_test2['order'] = cued_order
    tmp_df_test2['pair_idx'] = intact_pair_idx
    tmp_df_test2['test'] = 2
    tmp_df_test2['session'] = sess
    tmp_df_test2 = tmp_df_test2.iloc[rng.permutation(tmp_df_test2.index)].reset_index(drop=True)
    df_g1_test = pd.concat([df_g1_test,tmp_df_test1,tmp_df_test2])

df_g1_study.reset_index(inplace = True, drop=True)
df_g1_test.reset_index(inplace = True, drop=True)  

In [5]:
df_g1_study.query("session==0")

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,session,pair_idx
0,1118,147,PRAIRIE,BOMB,0,0
1,581,634,FLOOR,GHOST,0,1
2,1087,710,PLAYGROUND,HIP,0,2
3,1458,679,THIMBLE,HAMBURGER,0,3
4,745,589,ISLAND,FOLLOWER,0,4
5,709,1401,HINGE,SUGAR,0,5
6,134,70,BLADE,BAIT,0,6
7,632,1292,GERM,SHORTCAKE,0,7
8,1081,698,PLANE,HEATER,0,8
9,1471,1225,TOAST,SALESMAN,0,9


In [6]:
df_g1_test.query("session==0")

Unnamed: 0,test_itemno1,test_itemno2,test_item1,test_item2,correct_ans,order,pair_idx,test,session
0,1354,563,SPIT,FIG,1,1,38,1,0
1,634,581,GHOST,FLOOR,1,1,1,1,0
2,262,1330,CAULIFLOWER,SMOG,0,0,-1,1,0
3,108,1356,BEAST,SPOOL,1,1,39,1,0
4,247,1315,CARPET,SKETCH,1,0,19,1,0
5,1625,1308,WRENCH,SILVERWARE,1,1,33,1,0
6,146,1542,BOLT,UTENSIL,1,0,34,1,0
7,510,200,DWARF,BURGLAR,0,1,-1,1,0
8,857,1452,MAILBOX,TERRITORY,0,1,-1,1,0
9,783,1185,KNAPSACK,REPTILE,1,0,24,1,0


In [7]:
# save
with open('../Data/simuS1_group1_design.pkl', 'wb') as outp:
    pickle.dump(df_g1_study, outp, pickle.HIGHEST_PROTOCOL)
    pickle.dump(df_g1_test, outp, pickle.HIGHEST_PROTOCOL)

### Group2: Pair Recog - CR

In [8]:
df_g2_study = pd.DataFrame()
df_g2_test = pd.DataFrame()
rng = np.random.default_rng(seed=42)

for sess in range(simu_sess_num):
    tmp_df_study = pd.DataFrame()
    tmp_df_test1 = pd.DataFrame()
    tmp_df_test2 = pd.DataFrame()

    # study
    all_words = rng.choice(wordpool, 2*(n+v), replace=False)
    pres_words = all_words[0:2*n]
    pres_words = np.reshape(pres_words, (n,2))
    tmp_df_study['study_itemno1'] = pres_words[:,0]
    tmp_df_study['study_itemno2'] = pres_words[:,1]
    tmp_df_study['study_item1'] = items[pres_words[:,0]-1]
    tmp_df_study['study_item2'] = items[pres_words[:,1]-1]
    tmp_df_study['session'] = sess
    tmp_df_study['pair_idx'] = np.arange(n)
    df_g2_study = pd.concat([df_g2_study,tmp_df_study])
    valid_words = pres_words[0:v,:]

    # test1: pair recognition
    old_pairs = valid_words.copy()
    old_order = rng.permutation([0]*int(v/2) + [1]*int(v/2)) # 0:A-B, 1:B-A
    for i in range(old_pairs.shape[0]):
        if old_order[i] == 1:
            old_pairs[i] = np.flip(old_pairs[i])
    old_pair_idx = np.arange(v)

    new_words = all_words[2*n:]
    new_pairs = np.reshape(new_words, (v,2))
    new_order = [-1] * v
    new_pair_idx = np.array([-1] * v)

    test1_pairs = np.concatenate((old_pairs,new_pairs),axis=0)
    test1_type = np.array([1]*v+[0]*v)  #1:old, 0:new
    test1_order = np.concatenate((old_order,new_order),axis=0)
    test1_pair_idx = np.concatenate((old_pair_idx,new_pair_idx),axis=0)
    tmp_df_test1['test_itemno1'] = test1_pairs[:,0]
    tmp_df_test1['test_itemno2'] = test1_pairs[:,1]
    tmp_df_test1['test_item1'] = items[test1_pairs[:,0]-1]
    tmp_df_test1['test_item2'] = items[test1_pairs[:,1]-1]
    tmp_df_test1['correct_ans'] = test1_type
    tmp_df_test1['order'] = test1_order
    tmp_df_test1['pair_idx'] = test1_pair_idx
    tmp_df_test1['test'] = 1
    tmp_df_test1['session'] = sess
    tmp_df_test1 = tmp_df_test1.iloc[rng.permutation(tmp_df_test1.index)].reset_index(drop=True)

    # test2: cued recall
    cued_order = rng.permutation([0]*int(v/2) + [1]*int(v/2)) # 0:A-B, 1:B-A
    test2_probe = np.array([valid_words[i,cued_order[i]] for i in range(v)])
    test2_ans = np.array([valid_words[i,1-cued_order[i]] for i in range(v)])
    tmp_df_test2['test_itemno1'] = test2_probe
    tmp_df_test2['test_itemno2'] = -1
    tmp_df_test2['test_item1'] = items[test2_probe-1]
    tmp_df_test2['test_item2'] = -1
    tmp_df_test2['correct_ans'] = test2_ans
    tmp_df_test2['order'] = cued_order
    tmp_df_test2['pair_idx'] = np.arange(v)
    tmp_df_test2['test'] = 2
    tmp_df_test2['session'] = sess
    tmp_df_test2 = tmp_df_test2.iloc[rng.permutation(tmp_df_test2.index)].reset_index(drop=True)
    df_g2_test = pd.concat([df_g2_test,tmp_df_test1,tmp_df_test2])

df_g2_study.reset_index(inplace = True, drop=True)
df_g2_test.reset_index(inplace = True, drop=True)  

In [9]:
df_g2_study

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,session,pair_idx
0,1406,1272,SUN,SERVER,0,0
1,117,1287,BEGGAR,SHIRT,0,1
2,190,1261,BUILDER,SEAGULL,0,2
3,475,1439,DISHWASHER,TEA,0,3
4,374,94,COOKIE,BASEMENT,0,4
...,...,...,...,...,...,...
47995,1196,705,ROAD,HERD,999,43
47996,853,1122,MAGAZINE,PRIEST,999,44
47997,969,670,OBOE,GUITAR,999,45
47998,1393,1378,STREET,STEREO,999,46


In [10]:
df_g2_test

Unnamed: 0,test_itemno1,test_itemno2,test_item1,test_item2,correct_ans,order,pair_idx,test,session
0,1109,898,PORPOISE,MINT,1,1,32,1,0
1,661,567,GRILL,FINGER,0,-1,-1,1,0
2,1086,732,PLATE,ICING,1,1,39,1,0
3,249,720,CARROT,HOSE,1,1,13,1,0
4,1385,340,STONE,COD,1,0,28,1,0
...,...,...,...,...,...,...,...,...,...
119995,165,-1,BRAIN,-1,983,1,8,2,999
119996,270,-1,CENT,-1,344,1,19,2,999
119997,1416,-1,SWAMP,-1,1289,1,4,2,999
119998,993,-1,OWNER,-1,1552,1,14,2,999


In [11]:
# save
with open('../Data/simuS1_group2_design.pkl', 'wb') as outp:
    pickle.dump(df_g2_study, outp, pickle.HIGHEST_PROTOCOL)
    pickle.dump(df_g2_test, outp, pickle.HIGHEST_PROTOCOL)

### Group3: Item Recog - CR

In [12]:
df_g3_study = pd.DataFrame()
df_g3_test = pd.DataFrame()
rng = np.random.default_rng(seed=42)

for sess in range(simu_sess_num):
    tmp_df_study = pd.DataFrame()
    tmp_df_test1 = pd.DataFrame()
    tmp_df_test2 = pd.DataFrame()

    # study
    all_words = rng.choice(wordpool, 2*n+v, replace=False)
    pres_words = all_words[0:2*n]
    pres_words = np.reshape(pres_words, (n,2))
    tmp_df_study['study_itemno1'] = pres_words[:,0]
    tmp_df_study['study_itemno2'] = pres_words[:,1]
    tmp_df_study['study_item1'] = items[pres_words[:,0]-1]
    tmp_df_study['study_item2'] = items[pres_words[:,1]-1]
    tmp_df_study['session'] = sess
    tmp_df_study['pair_idx'] = np.arange(n)
    df_g3_study = pd.concat([df_g3_study,tmp_df_study])
    valid_words = pres_words[0:v,:]

    # test1: item recognition
    old_order = rng.permutation([0]*int(v/2) + [1]*int(v/2)) # 0:A, 1:B
    old_probe = np.array([valid_words[i,old_order[i]] for i in range(v)])
    old_pair_idx = np.arange(v)
    new_probe = all_words[2*n:]
    new_order = [-1] * v
    new_pair_idx = np.array([-1] * v)

    test1_probe = np.concatenate((old_probe,new_probe),axis=0)
    test1_type = np.array([1]*v+[0]*v)  #1:old, 0:new
    test1_order = np.concatenate((old_order,new_order),axis=0)
    test1_pair_idx = np.concatenate((old_pair_idx,new_pair_idx),axis=0)
    tmp_df_test1['test_itemno1'] = test1_probe
    tmp_df_test1['test_itemno2'] = -1
    tmp_df_test1['test_item1'] = items[test1_probe-1]
    tmp_df_test1['correct_ans'] = test1_type
    tmp_df_test1['order'] = test1_order
    tmp_df_test1['pair_idx'] = test1_pair_idx
    tmp_df_test1['test'] = 1
    tmp_df_test1['session'] = sess
    tmp_df_test1 = tmp_df_test1.iloc[rng.permutation(tmp_df_test1.index)].reset_index(drop=True)

    # test2: cued recall
    cued_order = rng.permutation([0]*int(v/2) + [1]*int(v/2)) # 0:A-B, 1:B-A
    test2_probe = np.array([valid_words[i,cued_order[i]] for i in range(v)])
    test2_ans = np.array([valid_words[i,1-cued_order[i]] for i in range(v)])
    tmp_df_test2['test_itemno1'] = test2_probe
    tmp_df_test2['test_itemno2'] = -1
    tmp_df_test2['test_item1'] = items[test2_probe-1]
    tmp_df_test2['correct_ans'] = test2_ans
    tmp_df_test2['order'] = cued_order
    tmp_df_test2['pair_idx'] = np.arange(v)
    tmp_df_test2['test'] = 2
    tmp_df_test2['session'] = sess
    tmp_df_test2 = tmp_df_test2.iloc[rng.permutation(tmp_df_test2.index)].reset_index(drop=True)
    df_g3_test = pd.concat([df_g3_test,tmp_df_test1,tmp_df_test2])

df_g3_study.reset_index(inplace = True, drop=True)
df_g3_test.reset_index(inplace = True, drop=True)  

In [13]:
df_g3_study

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,session,pair_idx
0,186,279,BUFFALO,CHAMPION,0,0
1,255,776,CASKET,KIDNEY,0,1
2,938,1259,NAVIGATOR,SEA,0,2
3,582,1115,FLORIDA,POTATO,0,3
4,224,694,CAMERA,HAYSTACK,0,4
...,...,...,...,...,...,...
47995,327,316,CLOTHES,CLAMP,999,43
47996,812,1086,LEOPARD,PLATE,999,44
47997,967,344,OATMEAL,COLESLAW,999,45
47998,467,1048,DINER,PERCH,999,46


In [14]:
df_g3_test

Unnamed: 0,test_itemno1,test_itemno2,test_item1,correct_ans,order,pair_idx,test,session
0,691,-1,HATCHET,1,0,14,1,0
1,924,-1,MOUNTAIN,1,0,39,1,0
2,1392,-1,STREAM,1,0,19,1,0
3,1365,-1,STAFF,0,-1,-1,1,0
4,760,-1,JOURNAL,0,-1,-1,1,0
...,...,...,...,...,...,...,...,...
119995,34,-1,APPLICATION,83,0,0,2,999
119996,1308,-1,SILVERWARE,1272,0,16,2,999
119997,923,-1,MOTORCYCLE,804,0,23,2,999
119998,1133,-1,PROTRACTOR,1412,1,29,2,999


In [15]:
# save
with open('../Data/simuS1_group3_design.pkl', 'wb') as outp:
    pickle.dump(df_g3_study, outp, pickle.HIGHEST_PROTOCOL)
    pickle.dump(df_g3_test, outp, pickle.HIGHEST_PROTOCOL)