In [1]:
import numpy as np
import pandas as pd
import pickle

In [2]:
df = pd.read_table("../Data/RN2_Pix.dat", sep="\s+", names=['subject','session','list','recog_pos','picture','category','study_pos','old_lag','study_lag','confidence','rt'])
df

Unnamed: 0,subject,session,list,recog_pos,picture,category,study_pos,old_lag,study_lag,confidence,rt
0,103,1,0,1,VA084,_OLD_,6,-999,59,6,705
1,103,1,0,2,GBEN253,_OLD_,5,-1,61,6,936
2,103,1,0,3,ONTO011,_OLD_,3,-2,64,6,678
3,103,1,0,4,ISBO1,_NEW_,-99,-999,-9999,4,1461
4,103,1,0,5,PAGETT11,_OLD_,38,-999,31,5,1799
...,...,...,...,...,...,...,...,...,...,...,...
69883,99,1,5,124,CHN639,_NEW_,-99,-999,-9999,4,1460
69884,99,1,5,125,CT173,_OLD_,39,-999,150,2,1040
69885,99,1,5,126,GRTO2,_NEW_,-99,-999,-9999,2,1269
69886,99,1,5,127,GR022,_OLD_,46,-999,145,1,887


In [3]:
# pic to itemno & old
pics = np.unique(df.picture)
pic2itemno = {}
for i in range(len(pics)):
    pic2itemno[pics[i]] = i + 1

itemnos = []
for pic in df.picture:
    itemnos.append(pic2itemno[pic])

df['itemno'] = itemnos
df['old'] = df.category == '_OLD_'

In [4]:
# unique list index
subjectlist = np.sort(np.unique(df.subject))
df['list_uni'] = df.apply(lambda x: np.where(subjectlist == x['subject'])[0].item()*6 + x['list'], axis =1)
df = df.sort_values(by = ['list_uni','recog_pos'])
df

Unnamed: 0,subject,session,list,recog_pos,picture,category,study_pos,old_lag,study_lag,confidence,rt,itemno,old,list_uni
36096,37,1,0,1,CHN211,_OLD_,9,-999,56,6,1053,195,True,0
36097,37,1,0,2,GBEN655,_OLD_,55,46,11,2,2306,493,True,0
36098,37,1,0,3,ECU104,_OLD_,52,-3,15,3,1457,376,True,0
36099,37,1,0,4,MXACP4,_OLD_,47,-5,21,1,2333,998,True,0
36100,37,1,0,5,CHN118,_NEW_,-99,-999,-9999,1,1704,184,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36091,172,1,5,124,JEROCTWL,_NEW_,-99,-999,-9999,2,1076,801,False,545
36092,172,1,5,125,THA064,_NEW_,-99,-999,-9999,6,808,1335,False,545
36093,172,1,5,126,RI003,_OLD_,28,-999,162,4,999,1245,True,545
36094,172,1,5,127,IND117,_NEW_,-99,-999,-9999,2,1114,747,False,545


In [5]:
# organize
df = df.drop(columns=['session','picture','category','confidence','rt','subject','list'])
df = df[['list_uni','recog_pos', 'itemno', 'old', 'old_lag', 'study_pos', 'study_lag']]
df

Unnamed: 0,list_uni,recog_pos,itemno,old,old_lag,study_pos,study_lag
36096,0,1,195,True,-999,9,56
36097,0,2,493,True,46,55,11
36098,0,3,376,True,-3,52,15
36099,0,4,998,True,-5,47,21
36100,0,5,184,False,-999,-99,-9999
...,...,...,...,...,...,...,...
36091,545,124,801,False,-999,-99,-9999
36092,545,125,1335,False,-999,-99,-9999
36093,545,126,1245,True,-999,28,162
36094,545,127,747,False,-999,-99,-9999


### design

In [3]:
rng = np.random.default_rng(seed=42)
simu_sess_num = 1000
simu_old_num = 64
simu_new_num = 64

In [4]:
df_study = pd.DataFrame()
df_test = pd.DataFrame()

for i in range(simu_sess_num):

    words = rng.choice(itemno_list, simu_old_num + simu_new_num, replace = False)
    old_words = words[0:simu_old_num]
    new_words = words[simu_old_num:simu_old_num+simu_new_num]
    test_words = rng.permutation(np.concatenate([old_words,new_words]))

    tmp_study = df.loc[old_words]
    tmp_study["session"] = i
    df_study = df_study.append(tmp_study)
    
    tmp_test = df.loc[test_words]
    tmp_test["session"] = i
    tmp_test["old"] = np.isin(test_words, old_words)
    df_test = df_test.append(tmp_test)
    
df_study.reset_index(inplace = True)
df_test.reset_index(inplace = True)

In [5]:
df_study

Unnamed: 0,itemno,item,freq,quantile,session
0,552,FARM,1161,8,0
1,812,LEOPARD,118,3,0
2,741,INSTRUCTOR,78,2,0
3,153,BOTTLE,1479,8,0
4,150,BOOT,149,3,0
...,...,...,...,...,...
99995,1491,TOWN,3328,9,999
99996,1057,PIANO,466,6,999
99997,1398,SUBMARINE,154,3,999
99998,791,LABYRINTH,32,0,999


In [6]:
df_test

Unnamed: 0,itemno,item,freq,quantile,session,old
0,595,FORT,411,6,0,True
1,864,MANSION,156,3,0,True
2,121,BENCH,345,6,0,True
3,667,GUARD,658,7,0,False
4,112,BEE,185,4,0,True
...,...,...,...,...,...,...
199995,850,LUNG,169,4,999,True
199996,1485,TORNADO,34,0,999,False
199997,1553,VEHICLE,383,6,999,False
199998,1215,RUG,321,5,999,True


In [6]:
with open('../Data/simu2_design.pkl', 'wb') as outp:
    pickle.dump(df, outp, pickle.HIGHEST_PROTOCOL)