In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
simu_sess_num = 1000
n = 160
wordpool = np.arange(1,401)
pos_lags = np.array([2,4,6,8,16])

In [3]:
df = pd.DataFrame()
rng = np.random.default_rng(seed=42)

for sess in range(simu_sess_num):
    tmp_df = pd.DataFrame()
    
    # choose present words and order
    pres_words = rng.choice(wordpool, 2*n, replace = False)
    pres_words = np.reshape(pres_words, (n,2))
    
    # choose test type of pres words
    pres_type = [0,1,2,3] * 40
    pres_type = rng.permutation(pres_type)
    while pres_type[0] == 3:
        pres_type = rng.permutation(pres_type)  # the first presentation could not be tested with pair new!
        
    # choose test words (corresponding to pres_words and pres_type)
    new_words = rng.permutation(wordpool[~np.isin(wordpool, pres_words)])
    newidx = 0
    test_words = []
    for i in range(n):
        t = pres_type[i]
        tmp = pres_words[i]
        if t == 0:  # single new
            test_words.append([new_words[newidx],-1])
            newidx += 1
        elif t == 1:  # single old
            pick = rng.choice([0,1])
            test_words.append([tmp[pick], -1])
        elif t == 2:  # pair old
            test_words.append(tmp.tolist())
        elif t == 3:  # pair new
            tmp_pre = pres_words[i-1].tolist()
            order = rng.permutation([tmp, tmp_pre])
            test_words.append([order[0][0], order[1][1]])
    test_words = np.array(test_words)
    
    # algorithm fitting lags
    presidx = np.arange(1,n+1, dtype=int)  # idx in pres_words
    testidx = np.zeros(n, dtype=int)  # idx in test_words
    tested = np.zeros(n, dtype=int)  # whether item of pres_word is tested
    test_lag = np.zeros(n, dtype=int)  # lag of test presentation
    test_type = -1 * np.ones(n, dtype=int)  # -1: no fit; 0: single new; 1: single old; 2: pair old; 3: pair new;
    lags = pos_lags.copy()
    while True:
        if lags.size == 0:
            break
        lag = rng.choice(lags)
        for i in range(n):
            if tested[i] == 0 and i + lag <= n-1 and test_type[i+lag] ==-1:
                testidx[i+lag] = presidx[i]
                test_type[i+lag] = pres_type[i]
                test_lag[i+lag] = lag
                tested[i] = 1
                break
        else:
            lags = np.delete(lags, np.argwhere(lags==lag))
            
    # find those not replicated study pairs and fill into those blanks
    filler = pres_words[tested == 0]
    filleridx = rng.permutation(presidx[tested == 0])
    fidx = 0
    test_seq = []
    for i in range(n):
        if test_type[i] == -1:
            testidx[i] = filleridx[fidx]
            fidx += 1
        test_seq.append(test_words[testidx[i]-1])  # create the test presentation sequence
    test_seq = np.array(test_seq)
    
    tmp_df['position'] = presidx
    tmp_df['session'] = sess
    tmp_df['testidx'] = testidx
    tmp_df['lag'] = test_lag
    tmp_df['type'] = test_type
    tmp_df['study_itemno1'] = pres_words[:,0]
    tmp_df['study_itemno2'] = pres_words[:,1]
    tmp_df['test_itemno1'] = test_seq[:,0]
    tmp_df['test_itemno2'] = test_seq[:,1]
    df = df.append(tmp_df)

df.reset_index(inplace = True, drop=True)  

In [4]:
df

Unnamed: 0,position,session,testidx,lag,type,study_itemno1,study_itemno2,test_itemno1,test_itemno2
0,1,0,155,0,-1,365,97,309,-1
1,2,0,106,0,-1,260,204,74,-1
2,3,0,154,0,-1,238,353,135,-1
3,4,0,71,0,-1,375,126,274,-1
4,5,0,1,4,2,227,366,365,97
...,...,...,...,...,...,...,...,...,...
159995,156,999,154,2,1,201,365,389,-1
159996,157,999,151,6,2,316,342,356,361
159997,158,999,142,16,0,270,41,319,-1
159998,159,999,143,16,3,52,275,208,349


In [5]:
# save
with open('../Data/simu3_design.pkl', 'wb') as outp:
    pickle.dump(df, outp, pickle.HIGHEST_PROTOCOL)

In [7]:
# df = pd.DataFrame()
# rng = np.random.default_rng(seed=42)

# for sess in range(simu_sess_num):
    
#     tmp_df = pd.DataFrame()
    
#     # algorithm fitting lags
#     presidx = np.arange(1,n+1, dtype=int)
#     testidx = np.zeros(n, dtype=int)
#     tested = np.zeros(n, dtype=int)
#     testlag = np.zeros(n, dtype=int)
#     testtype = np.zeros(n, dtype=int) # 0: single new; 1: single old; 2: pair old; 3: pair new;
#     lags = pos_lags.copy()
#     while True:
#         if lags.size == 0:
#             break
#         lag = rng.choice(lags)
#         for i in range(n):
#             if tested[i] == 0 and i + lag <= n-1 and testidx[i+lag] == 0:
#                 testidx[i+lag] = presidx[i]
#                 tested[i] = 1
#                 testlag[i+lag] = lag
#                 testtype[i+lag] = rng.choice([1,2,3]) if i != 0 else rng.choice([1,2])  # the first item could not be tested by pair new!
#                 break
#         else:
#             lags = np.delete(lags, np.argwhere(lags==lag))
    
#     # average types
#     num0 = sum(testtype == 0)
#     num1 = sum(testtype == 1)
#     num2 = sum(testtype == 2)
#     num3 = sum(testtype == 3)
#     nums = [num0, num1, num2, num3]
#     while nums[0] < n/4:
#         num = max(nums[1:4])
#         for i in range(1,4):
#             if nums[i] == num:
#                 idx = rng.choice(np.where(testtype == i)[0])
#                 testtype[idx] = 0
#                 testlag[idx] = 0
#                 testidx[idx] = 0
#                 nums[i] -= 1
#                 nums[0] += 1
#                 break
#     tmp_df['position'] = presidx
#     tmp_df['session'] = sess
#     tmp_df['testidx'] = testidx
#     tmp_df['lag'] = testlag
#     tmp_df['type'] = testtype

#     # choose present words
#     pres_words = rng.choice(wordpool, 2*n, replace = False)
#     pres_words = np.reshape(pres_words, (n,2))
#     tmp_df['study_itemno1'] = pres_words[:,0]
#     tmp_df['study_itemno2'] = pres_words[:,1]
    
#     # choose test words
#     new_words = rng.permutation(wordpool[~np.isin(wordpool, pres_words)])
#     newidx = 0
#     test_words = []
#     for i in range(n):
#         t = testtype[i]
#         pidx = testidx[i]
#         tmp = pres_words[pidx-1]
#         if t == 0:  # single new
#             test_words.append([new_words[newidx],-1])
#             newidx += 1
#         elif t == 1:  # single old
#             pick = rng.choice([0,1])
#             test_words.append([tmp[pick], -1])
#         elif t == 2:  # pair old
#             test_words.append(tmp.tolist())
#         elif t == 3:  # pair new
#             tmp_pre = pres_words[pidx-2].tolist()
#             order = rng.permutation([tmp, tmp_pre])
#             test_words.append([order[0][0], order[1][1]])
#     test_words = np.array(test_words)
#     tmp_df['test_itemno1'] = test_words[:,0]
#     tmp_df['test_itemno2'] = test_words[:,1]
#     df = df.append(tmp_df)

# df.reset_index(inplace = True, drop=True)