In [90]:
import numpy as np
import pandas as pd
from pathlib import Path


In [107]:
# set directory
base_dir = '/Volumes/diedrichsen_data$/data/FunctionalFusion'
if not Path(base_dir).exists():
    base_dir = '/srv/diedrichsen/data/FunctionalFusion'


data_dir = base_dir + '/Nishimoto_103Task/derivatives'


In [92]:
# list of subjects in the dataset
subj_id = [1, 2, 3, 4, 5, 6]

In [108]:
# go through subjects and create a dataframe for all the subjects
def check_design(ses_id = "ses-01", division = 7, subj_list = []):
    D = [] # create an empty list 
    for s in subj_list:
        dirw = data_dir+f'/sub-{s:02d}/estimates/{ses_id}'
        D.append(pd.read_csv(dirw+f'/sub-{s:02d}_{ses_id}_reginfo.tsv',sep='\t'))

    df = pd.concat(D)


    # add a column (0 if the task is not done at all, 1 if it was done at least once)
    df['present'] = np.double(df["n_rep"]>0)


    # add another column to represent the half (1s for the first half and 0s for the second half)
    df["half"] = np.double(df["run"]<division)
    return df


In [109]:
# how many times in total a task was repeated in ses-01
dd = check_design(ses_id = "ses-01", division = 7, subj_list = subj_id)
tabl = pd.crosstab(index = dd["task_name"], columns=dd["sn"], values=dd["n_rep"], aggfunc=np.sum)
tabl
np.unique(tabl)

array([  8, 132])

In [111]:
# how many times in total a task was DONE in ses-01: SOME TASKS ARE DONE MORE THAN ONCE IN A RUN
dd = check_design(ses_id = "ses-01", division = 7, subj_list = subj_id)
tabl = pd.crosstab(index = dd["task_name"], columns=dd["sn"], values=dd["present"], aggfunc=np.sum)
tabl
np.unique(tabl)

array([ 5.,  6.,  7.,  8., 12.])

In [104]:
tabl2 = pd.crosstab(index = [dd["task_name"]], columns=[dd["sn"], dd["half"]], values=dd["present"], aggfunc=np.sum)
tabl2

sn,1,1,2,2,3,3,4,4,5,5,6,6
half,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
task_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
AnimalPhoto,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
AnimalVoice,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
CalcEasy,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
CalcHard,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
CategoryFluency,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...
TimeValue,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
TrafficSign,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
WordMeaning,2.0,3.0,2.0,3.0,2.0,3.0,2.0,3.0,2.0,3.0,2.0,3.0
WorldName,4.0,3.0,4.0,3.0,4.0,3.0,4.0,3.0,4.0,3.0,4.0,3.0


Take WordMeaning as an example, based on this and the previous table, a task like WorldName or WorldMeaning was repeated twice in one run.

In [106]:
dd2 = check_design(ses_id = "ses-02", division = 4, subj_list=subj_id)
tabl = pd.crosstab(index = dd2["task_name"], columns=dd2["sn"], values=dd2["n_rep"], aggfunc=np.sum)
tabl

sn,1,2,3,4,5,6
task_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AnimalPhoto,4,4,4,4,4,4
AnimalVoice,4,4,4,4,4,4
CalcEasy,4,4,4,4,4,4
CalcHard,4,4,4,4,4,4
CategoryFluency,4,4,4,4,4,4
...,...,...,...,...,...,...
TimeValue,4,4,4,4,4,4
TrafficSign,4,4,4,4,4,4
WordMeaning,4,4,4,4,4,4
WorldName,4,4,4,4,4,4


In [112]:
tabl2 = pd.crosstab(index = [dd2["task_name"]], columns=[dd2["sn"], dd2["half"]], values=dd2["n_rep"], aggfunc=np.sum)
tabl2
np.unique(tabl2)

array([ 2, 33])