In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
# 1638 words
with open("../Data/wordpools/wasnorm_wordpool.txt") as f:
    items = f.readlines()
items = [i.replace("\n","") for i in items]
items = np.array(items)

In [3]:
n = 48
v = n-8  # last 8 not tested
wordpool = np.arange(1,1639)

nsubj = 100
g1_per = 3
g2_per = 5
g3_per = 4
g1_sessnum = nsubj * g1_per
g2_sessnum = nsubj * g2_per
g3_sessnum = nsubj * g3_per

### Group1: Association Recog - CR

In [4]:
df_g1_study = pd.DataFrame()
df_g1_test = pd.DataFrame()
rng = np.random.default_rng(seed=42)

subjidx = 1
subjsess = 0

for sess in range(g1_sessnum):
    tmp_df_study = pd.DataFrame()
    tmp_df_test1 = pd.DataFrame()
    tmp_df_test2 = pd.DataFrame()

    # study
    pres_words = rng.choice(wordpool, 2*n, replace = False)
    pres_words = np.reshape(pres_words, (n,2))
    pair_order = rng.permutation([0]*int(v/2) + [1]*int(v/2))  # 0: A-B, 1: B-A (during CR)
    tmp_df_study['study_itemno1'] = pres_words[:,0]
    tmp_df_study['study_itemno2'] = pres_words[:,1]
    tmp_df_study['study_item1'] = items[pres_words[:,0]-1]
    tmp_df_study['study_item2'] = items[pres_words[:,1]-1]
    tmp_df_study['session'] = sess
    tmp_df_study['order'] = np.concatenate((pair_order, [-1]*8))
    tmp_df_study['pair_idx'] = np.arange(n)
    tmp_df_study['subject'] = subjidx
    df_g1_study = pd.concat([df_g1_study,tmp_df_study])
    valid_words = pres_words[0:v,:]

    # test1: association recognition
    intact_idx = rng.permutation([True]*int(v/2) + [False]*int(v/2))
    intact_pair_idx = np.arange(v)[intact_idx]
    intact_pairs_raw = valid_words[intact_idx,:]
    intact_pairs = valid_words[intact_idx,:]
    intact_order = pair_order[intact_idx] # 0:A-B, 1:B-A
    for i in range(intact_pairs.shape[0]):
        if intact_order[i] == 1:
            intact_pairs[i] = np.flip(intact_pairs[i])

    rearrange_idx = ~intact_idx
    rearrange_pair_idx = np.array([-1]*int(v/2))
    rearrange_pairs = valid_words[rearrange_idx,:]
    rearrange_pairs[:,1] = rng.permutation(rearrange_pairs[:,1])
    rearrange_order = pair_order[rearrange_idx] # 0:A-B, 1:B-A
    for i in range(rearrange_pairs.shape[0]):
        if rearrange_order[i] == 1:
            rearrange_pairs[i] = np.flip(rearrange_pairs[i])

    test1_pairs = np.concatenate((intact_pairs,rearrange_pairs),axis=0)
    test1_pair_idx = np.concatenate((intact_pair_idx,rearrange_pair_idx),axis=0)
    test1_type = np.array([1]*int(v/2)+[0]*int(v/2))  #1:intact, 0:rearranged
    test1_order = np.concatenate((intact_order,rearrange_order),axis=0)
    tmp_df_test1['test_itemno1'] = test1_pairs[:,0]
    tmp_df_test1['test_itemno2'] = test1_pairs[:,1]
    tmp_df_test1['test_item1'] = items[test1_pairs[:,0]-1]
    tmp_df_test1['test_item2'] = items[test1_pairs[:,1]-1]
    tmp_df_test1['correct_ans'] = test1_type
    tmp_df_test1['order'] = test1_order
    tmp_df_test1['pair_idx'] = test1_pair_idx
    tmp_df_test1['test'] = 1
    tmp_df_test1['session'] = sess
    tmp_df_test1['subject'] = subjidx
    tmp_df_test1 = tmp_df_test1.iloc[rng.permutation(tmp_df_test1.index)].reset_index(drop=True)
    
    # test2: cued recall
    cued_order = pair_order[intact_idx] # 0:A-B, 1:B-A
    test2_probe = np.array([intact_pairs_raw[i,cued_order[i]] for i in range(int(v/2))])
    test2_ans = np.array([intact_pairs_raw[i,1-cued_order[i]] for i in range(int(v/2))])
    tmp_df_test2['test_itemno1'] = test2_probe
    tmp_df_test2['test_itemno2'] = -1
    tmp_df_test2['test_item1'] = items[test2_probe-1]
    tmp_df_test2['test_item2'] = -1
    tmp_df_test2['correct_ans'] = test2_ans
    tmp_df_test2['order'] = cued_order
    tmp_df_test2['pair_idx'] = intact_pair_idx
    tmp_df_test2['test'] = 2
    tmp_df_test2['session'] = sess
    tmp_df_test2['subject'] = subjidx
    tmp_df_test2 = tmp_df_test2.iloc[rng.permutation(tmp_df_test2.index)].reset_index(drop=True)
    df_g1_test = pd.concat([df_g1_test,tmp_df_test1,tmp_df_test2])

    # update subject
    subjsess += 1
    if subjsess % g1_per == 0:
        subjidx += 1
        subjsess = 0

df_g1_study.reset_index(inplace = True, drop=True)
df_g1_test.reset_index(inplace = True, drop=True)
df_g1_study['group'] = 1
df_g1_test['group'] = 1

In [5]:
df_g1_study

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,session,order,pair_idx,subject,group
0,1118,147,PRAIRIE,BOMB,0,1,0,1,1
1,581,634,FLOOR,GHOST,0,0,1,1,1
2,1087,710,PLAYGROUND,HIP,0,1,2,1,1
3,1458,679,THIMBLE,HAMBURGER,0,0,3,1,1
4,745,589,ISLAND,FOLLOWER,0,0,4,1,1
...,...,...,...,...,...,...,...,...,...
14395,21,838,ANCHOR,LOCK,299,-1,43,100,1
14396,622,439,GARBAGE,DANDRUFF,299,-1,44,100,1
14397,1131,47,PROFESSOR,ASPIRIN,299,-1,45,100,1
14398,122,1347,BERRY,SPARROW,299,-1,46,100,1


In [6]:
df_g1_test

Unnamed: 0,test_itemno1,test_itemno2,test_item1,test_item2,correct_ans,order,pair_idx,test,session,subject,group
0,146,1542,BOLT,UTENSIL,1,0,34,1,0,1,1
1,147,1118,BOMB,PRAIRIE,1,1,0,1,0,1,1
2,1185,159,REPTILE,BOX,0,1,-1,1,0,1,1
3,358,1421,COMPASS,SWING,1,1,35,1,0,1,1
4,1201,1359,ROCK,SPRINKLE,1,0,12,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...
17995,886,-1,MEDICINE,-1,1336,1,22,2,299,100,1
17996,136,-1,BLENDER,-1,1572,1,19,2,299,100,1
17997,157,-1,BOUQUET,-1,1164,0,35,2,299,100,1
17998,1303,-1,SIDEWALK,-1,346,1,1,2,299,100,1


### Group2: Pair Recog - CR

In [7]:
df_g2_study = pd.DataFrame()
df_g2_test = pd.DataFrame()
rng = np.random.default_rng(seed=42)

subjidx = 1
subjsess = 0

for sess in range(g2_sessnum):
    tmp_df_study = pd.DataFrame()
    tmp_df_test1 = pd.DataFrame()
    tmp_df_test2 = pd.DataFrame()

    # study
    all_words = rng.choice(wordpool, 2*(n+v), replace=False)
    pres_words = all_words[0:2*n]
    pres_words = np.reshape(pres_words, (n,2))
    pair_order = rng.permutation([0]*int(v/2) + [1]*int(v/2))  # 0: A-B, 1: B-A (during CR)
    tmp_df_study['study_itemno1'] = pres_words[:,0]
    tmp_df_study['study_itemno2'] = pres_words[:,1]
    tmp_df_study['study_item1'] = items[pres_words[:,0]-1]
    tmp_df_study['study_item2'] = items[pres_words[:,1]-1]
    tmp_df_study['session'] = sess
    tmp_df_study['order'] = np.concatenate((pair_order, [-1]*8))
    tmp_df_study['pair_idx'] = np.arange(n)
    tmp_df_study['subject'] = subjidx
    df_g2_study = pd.concat([df_g2_study,tmp_df_study])
    valid_words = pres_words[0:v,:]

    # test1: pair recognition
    old_pairs = valid_words.copy()
    old_order = pair_order # 0:A-B, 1:B-A
    for i in range(old_pairs.shape[0]):
        if old_order[i] == 1:
            old_pairs[i] = np.flip(old_pairs[i])
    old_pair_idx = np.arange(v)

    new_words = all_words[2*n:]
    new_pairs = np.reshape(new_words, (v,2))
    new_order = [-1] * v
    new_pair_idx = np.array([-1] * v)

    test1_pairs = np.concatenate((old_pairs,new_pairs),axis=0)
    test1_type = np.array([1]*v+[0]*v)  #1:old, 0:new
    test1_order = np.concatenate((old_order,new_order),axis=0)
    test1_pair_idx = np.concatenate((old_pair_idx,new_pair_idx),axis=0)
    tmp_df_test1['test_itemno1'] = test1_pairs[:,0]
    tmp_df_test1['test_itemno2'] = test1_pairs[:,1]
    tmp_df_test1['test_item1'] = items[test1_pairs[:,0]-1]
    tmp_df_test1['test_item2'] = items[test1_pairs[:,1]-1]
    tmp_df_test1['correct_ans'] = test1_type
    tmp_df_test1['order'] = test1_order
    tmp_df_test1['pair_idx'] = test1_pair_idx
    tmp_df_test1['test'] = 1
    tmp_df_test1['session'] = sess
    tmp_df_test1['subject'] = subjidx
    tmp_df_test1 = tmp_df_test1.iloc[rng.permutation(tmp_df_test1.index)].reset_index(drop=True)

    # test2: cued recall
    cued_order = pair_order # 0:A-B, 1:B-A
    test2_probe = np.array([valid_words[i,cued_order[i]] for i in range(v)])
    test2_ans = np.array([valid_words[i,1-cued_order[i]] for i in range(v)])
    tmp_df_test2['test_itemno1'] = test2_probe
    tmp_df_test2['test_itemno2'] = -1
    tmp_df_test2['test_item1'] = items[test2_probe-1]
    tmp_df_test2['test_item2'] = -1
    tmp_df_test2['correct_ans'] = test2_ans
    tmp_df_test2['order'] = cued_order
    tmp_df_test2['pair_idx'] = np.arange(v)
    tmp_df_test2['test'] = 2
    tmp_df_test2['session'] = sess
    tmp_df_test2['subject'] = subjidx
    tmp_df_test2 = tmp_df_test2.iloc[rng.permutation(tmp_df_test2.index)].reset_index(drop=True)
    df_g2_test = pd.concat([df_g2_test,tmp_df_test1,tmp_df_test2])

    # update subject
    subjsess += 1
    if subjsess % g2_per == 0:
        subjidx += 1
        subjsess = 0

df_g2_study.reset_index(inplace = True, drop=True)
df_g2_test.reset_index(inplace = True, drop=True)
df_g2_study['group'] = 2
df_g2_test['group'] = 2

In [8]:
df_g2_study

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,session,order,pair_idx,subject,group
0,1406,1272,SUN,SERVER,0,1,0,1,2
1,117,1287,BEGGAR,SHIRT,0,0,1,1,2
2,190,1261,BUILDER,SEAGULL,0,1,2,1,2
3,475,1439,DISHWASHER,TEA,0,1,3,1,2
4,374,94,COOKIE,BASEMENT,0,0,4,1,2
...,...,...,...,...,...,...,...,...,...
23995,778,1251,KITCHEN,SCOTCH,499,-1,43,100,2
23996,488,9,DORM,AFRICA,499,-1,44,100,2
23997,600,902,FRAME,MITTEN,499,-1,45,100,2
23998,821,398,LILY,CRACKER,499,-1,46,100,2


In [9]:
df_g2_test

Unnamed: 0,test_itemno1,test_itemno2,test_item1,test_item2,correct_ans,order,pair_idx,test,session,subject,group
0,1109,898,PORPOISE,MINT,1,1,32,1,0,1,2
1,661,567,GRILL,FINGER,0,-1,-1,1,0,1,2
2,1086,732,PLATE,ICING,1,1,39,1,0,1,2
3,249,720,CARROT,HOSE,1,1,13,1,0,1,2
4,1385,340,STONE,COD,1,0,28,1,0,1,2
...,...,...,...,...,...,...,...,...,...,...,...
59995,562,-1,FIELD,-1,334,1,0,2,499,100,2
59996,1047,-1,PEPPER,-1,296,0,27,2,499,100,2
59997,1527,-1,TWIG,-1,112,0,30,2,499,100,2
59998,574,-1,FLANNEL,-1,1461,0,24,2,499,100,2


### Group3: Item Recog - CR

In [10]:
df_g3_study = pd.DataFrame()
df_g3_test = pd.DataFrame()
rng = np.random.default_rng(seed=42)

subjidx = 1
subjsess = 0

for sess in range(g3_sessnum):
    tmp_df_study = pd.DataFrame()
    tmp_df_test1 = pd.DataFrame()
    tmp_df_test2 = pd.DataFrame()

    # study
    all_words = rng.choice(wordpool, 2*n+v, replace=False)
    pres_words = all_words[0:2*n]
    pres_words = np.reshape(pres_words, (n,2))
    pair_order = rng.permutation([0]*int(v/2) + [1]*int(v/2)) # 0: A-B, 1: B-A (during CR)
    tmp_df_study['study_itemno1'] = pres_words[:,0]
    tmp_df_study['study_itemno2'] = pres_words[:,1]
    tmp_df_study['study_item1'] = items[pres_words[:,0]-1]
    tmp_df_study['study_item2'] = items[pres_words[:,1]-1]
    tmp_df_study['session'] = sess
    tmp_df_study['order'] = np.concatenate((pair_order, [-1]*8))
    tmp_df_study['pair_idx'] = np.arange(n)
    tmp_df_study['subject'] = subjidx
    df_g3_study = pd.concat([df_g3_study,tmp_df_study])
    valid_words = pres_words[0:v,:]

    # test1: item recognition
    old_order = pair_order # 0:B, 1:A
    old_probe = np.array([valid_words[i,1-old_order[i]] for i in range(v)])
    old_pair_idx = np.arange(v)
    new_probe = all_words[2*n:]
    new_order = [-1] * v
    new_pair_idx = np.array([-1] * v)

    test1_probe = np.concatenate((old_probe,new_probe),axis=0)
    test1_type = np.array([1]*v+[0]*v)  #1:old, 0:new
    test1_order = np.concatenate((old_order,new_order),axis=0)
    test1_pair_idx = np.concatenate((old_pair_idx,new_pair_idx),axis=0)
    tmp_df_test1['test_itemno1'] = test1_probe
    tmp_df_test1['test_itemno2'] = -1
    tmp_df_test1['test_item1'] = items[test1_probe-1]
    tmp_df_test1['correct_ans'] = test1_type
    tmp_df_test1['order'] = test1_order
    tmp_df_test1['pair_idx'] = test1_pair_idx
    tmp_df_test1['test'] = 1
    tmp_df_test1['session'] = sess
    tmp_df_test1['subject'] = subjidx
    tmp_df_test1 = tmp_df_test1.iloc[rng.permutation(tmp_df_test1.index)].reset_index(drop=True)

    # test2: cued recall
    cued_order = pair_order # 0:A-B, 1:B-A
    test2_probe = np.array([valid_words[i,cued_order[i]] for i in range(v)])
    test2_ans = np.array([valid_words[i,1-cued_order[i]] for i in range(v)])
    tmp_df_test2['test_itemno1'] = test2_probe
    tmp_df_test2['test_itemno2'] = -1
    tmp_df_test2['test_item1'] = items[test2_probe-1]
    tmp_df_test2['correct_ans'] = test2_ans
    tmp_df_test2['order'] = cued_order
    tmp_df_test2['pair_idx'] = np.arange(v)
    tmp_df_test2['test'] = 2
    tmp_df_test2['session'] = sess
    tmp_df_test2['subject'] = subjidx
    tmp_df_test2 = tmp_df_test2.iloc[rng.permutation(tmp_df_test2.index)].reset_index(drop=True)
    df_g3_test = pd.concat([df_g3_test,tmp_df_test1,tmp_df_test2])

    # update subject
    subjsess += 1
    if subjsess % g3_per == 0:
        subjidx += 1
        subjsess = 0
        
df_g3_study.reset_index(inplace = True, drop=True)
df_g3_test.reset_index(inplace = True, drop=True)
df_g3_study['group'] = 3
df_g3_test['group'] = 3

In [11]:
df_g3_study

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,session,order,pair_idx,subject,group
0,186,279,BUFFALO,CHAMPION,0,1,0,1,3
1,255,776,CASKET,KIDNEY,0,1,1,1,3
2,938,1259,NAVIGATOR,SEA,0,1,2,1,3
3,582,1115,FLORIDA,POTATO,0,0,3,1,3
4,224,694,CAMERA,HAYSTACK,0,0,4,1,3
...,...,...,...,...,...,...,...,...,...
19195,500,1135,DRIVEWAY,PUB,399,-1,43,100,3
19196,932,953,MUMMY,NIGHTGOWN,399,-1,44,100,3
19197,60,1559,AUNT,VEST,399,-1,45,100,3
19198,1278,1448,SHEEP,TEMPLE,399,-1,46,100,3


In [12]:
df_g3_test

Unnamed: 0,test_itemno1,test_itemno2,test_item1,correct_ans,order,pair_idx,test,session,subject,group
0,1292,-1,SHORTCAKE,1,0,14,1,0,1,3
1,1052,-1,PERSON,1,0,39,1,0,1,3
2,135,-1,BLANKET,1,0,19,1,0,1,3
3,1365,-1,STAFF,0,-1,-1,1,0,1,3
4,760,-1,JOURNAL,0,-1,-1,1,0,1,3
...,...,...,...,...,...,...,...,...,...,...
47995,454,-1,DETECTIVE,37,0,10,2,399,100,3
47996,449,-1,DEPARTMENT,1242,0,34,2,399,100,3
47997,859,-1,MAJOR,942,1,20,2,399,100,3
47998,1584,-1,WALNUT,1178,1,21,2,399,100,3


In [13]:
df_study = pd.concat([df_g1_study,df_g2_study,df_g3_study])
df_test = pd.concat([df_g1_test,df_g2_test,df_g3_test])

In [14]:
df_study

Unnamed: 0,study_itemno1,study_itemno2,study_item1,study_item2,session,order,pair_idx,subject,group
0,1118,147,PRAIRIE,BOMB,0,1,0,1,1
1,581,634,FLOOR,GHOST,0,0,1,1,1
2,1087,710,PLAYGROUND,HIP,0,1,2,1,1
3,1458,679,THIMBLE,HAMBURGER,0,0,3,1,1
4,745,589,ISLAND,FOLLOWER,0,0,4,1,1
...,...,...,...,...,...,...,...,...,...
19195,500,1135,DRIVEWAY,PUB,399,-1,43,100,3
19196,932,953,MUMMY,NIGHTGOWN,399,-1,44,100,3
19197,60,1559,AUNT,VEST,399,-1,45,100,3
19198,1278,1448,SHEEP,TEMPLE,399,-1,46,100,3


In [15]:
df_test

Unnamed: 0,test_itemno1,test_itemno2,test_item1,test_item2,correct_ans,order,pair_idx,test,session,subject,group
0,146,1542,BOLT,UTENSIL,1,0,34,1,0,1,1
1,147,1118,BOMB,PRAIRIE,1,1,0,1,0,1,1
2,1185,159,REPTILE,BOX,0,1,-1,1,0,1,1
3,358,1421,COMPASS,SWING,1,1,35,1,0,1,1
4,1201,1359,ROCK,SPRINKLE,1,0,12,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...
47995,454,-1,DETECTIVE,,37,0,10,2,399,100,3
47996,449,-1,DEPARTMENT,,1242,0,34,2,399,100,3
47997,859,-1,MAJOR,,942,1,20,2,399,100,3
47998,1584,-1,WALNUT,,1178,1,21,2,399,100,3


In [16]:
# save
with open('../Data/simuS1_design.pkl', 'wb') as outp:
    pickle.dump(df_study, outp, pickle.HIGHEST_PROTOCOL)
    pickle.dump(df_test, outp, pickle.HIGHEST_PROTOCOL)