# Prepare behaviour data for both BIN and HAD dataset
Data contains several columns are as follows:

- **sub**: subject ID  

- **session**: session ID  

- **run**: run ID 

- **stim**: Stimulus name which identified by ImageNet ID

- **category**: Specified category of the stimulus, indexing from 0 to 999 defined in ImageNet

- **label**: 1 means animate, -1 means inanimate  

- **fMRI_rt**: response time in the fMRI experiment 

- **fMRI_acc**: accuracy in the fMRI experiment. 1 means correct, 0 means wrong   

- **recog_rt**: response time in the recognition experiment. Subjects need to recognize whether they have remembered the stimulus or not

- **recog_key**: response in the recognition experiment. 1 means remember the stimulus, 0 means not remembered the stimulus  

In [1]:
# define path and params
import os
import numpy as np
import pandas as pd
from os.path import join as pjoin
from scipy.io import loadmat

bin_path = '/nfs/z1/userhome/ZhouMing/workingdir/BIN/NaturalObject/data/behavior/data/fmri/train'
had_path = '/nfs/z1/userhome/ZhouMing/workingdir/BIN/action/data/behavior'
animate_path = '/nfs/z1/userhome/ZhouMing/workingdir/BIN/Analysis_results/imagenet_decoding/imagenet_animate_or_not.mat'
animate_info = loadmat(animate_path)['animate_label'].squeeze()

action_path = '/nfs/z1/userhome/ZhouMing/workingdir/BIN/Analysis_results/actionTaxonomy_raw.csv'
action_class = pd.read_csv(action_path)
sports_info = (action_class.iloc[:, 3] == 'Participating_in_sports&Exercise&Recreation').to_numpy().astype(int)
sports_info = np.array([1 if i==1 else -1 for i in sports_info])

n_bin_run = 10
n_bin_stim = 100
n_had_run = 12
n_recog_run = 4 

In [7]:
core_sub = sorted([i for i in os.listdir(bin_path) if 'core' in i and int(i[-2:])<=10])
remain_sub = sorted([i for i in os.listdir(bin_path) if 'core' not in i and 'sub' in i and int(i[3:])<=40 and int(i[3:])>=10])
remain_sub.insert(0, 'sub07')
core_sub.extend(remain_sub)

# prepare containers
info = pd.DataFrame(columns=['sub', 'session', 'run', 'stim', 'category', 'label', 
                             'fMRI_rt', 'fMRI_acc', 'recog_rt', 'recog_key'])
# This flag is used to substract subject index as the sub-20 is the same person as sub-07. After merging sub-20 and sub-07, 
# there will be a subject-index flip, eg. from sub-17 to sub-19, this special flag can be used to remove this flip.
specifial_flag = 0 
# locate information in core subject 
for sub_idx, sub_name in enumerate(core_sub):
    sub_path = pjoin(bin_path, sub_name)
    sess_names = sorted([i for i in os.listdir(sub_path) if 'ses' in i and int(i[-2:])<=4])
    for sess_idx, sess_name in enumerate(sess_names):
        sess_path = pjoin(sub_path, sess_name)
        design_path = pjoin(sess_path, '%s_%s_design.mat'%(sub_name, sess_name))
        design_info = loadmat(design_path)
        # prepare info 
        stim_sess = design_info['sessStim'].flatten()
        stim_sess = [x[0] for x in stim_sess]
        category_sess = design_info['sessPar'][:, :, 1].flatten()
        sess_info = np.full((n_bin_stim*n_bin_run, 8), np.nan)
        # # handle specified subject 
        if sub_name == 'sub20': 
            # sub20 and sub07 is the same person, so sub20-sess01 can be regarded as the sub07-sess03
            sub_idx, sess_idx, specifial_flag = 11, 2, 1
        # loop in run index to extract info
        for run_idx in np.linspace(1, n_bin_run, n_bin_run, dtype=int):
            if sub_idx < 10:
                run_name = '%s_%s_run-%02d.mat'%(sub_name, sess_name, run_idx)
            else:
                run_name = '%s_%s_run%02d.mat'%(sub_name, sess_name, run_idx)
            run_path = pjoin(sess_path, run_name)
            # load mat and extract info
            raw_mat = loadmat(run_path)
            raw_info = raw_mat['trial']
            # compute var into info in orders as follows:
            # label, rt, acc, sub, session, run
            run_animate = animate_info[(raw_info[:, 1]-1).astype(int)]
            acc_info = run_animate == raw_info[:, 3]
            sess_info[n_bin_stim*(run_idx-1):n_bin_stim*run_idx, :3] = np.stack((np.repeat(int(sub_idx+1-specifial_flag), 100), np.repeat(int(sess_idx+1), 100), 
                                                                       np.repeat(run_idx, 100)), axis=1)
            sess_info[n_bin_stim*(run_idx-1):n_bin_stim*run_idx, 3] = run_animate
            sess_info[n_bin_stim*(run_idx-1):n_bin_stim*run_idx, 4] = raw_info[:, 4]
            sess_info[n_bin_stim*(run_idx-1):n_bin_stim*run_idx, 5] = acc_info
        # # handle specified subject 
        if sub_name == 'sub20': 
            # in beh exp, sub20-sess01 will not be regarded as the sub07-sess03
            sess_idx = 0
        if sub_name == 'sub07': 
            sess_idx += 2 
        # Merge recognition experiment info into the csv
        if os.path.exists(pjoin(sess_path, 'sub%s_sess%02d_run01_beh.mat'%(sub_name[-2:], sess_idx+1))):
            recog_info = np.zeros((500, 4, 4))
            for run_recog_idx in range(n_recog_run):
                run_recog_path = pjoin(sess_path, 'sub%s_sess%02d_run%02d_beh.mat'%(sub_name[-2:], sess_idx+1, run_recog_idx+1))
                run_recog_info = loadmat(run_recog_path)['trial']
                recog_info[:, run_recog_idx] = run_recog_info[:, 1:] # category, condition, reponse, rt
            # resort the recog info to match the original trial info
            recog_info = recog_info.reshape((2000, 4))
            recog_info = recog_info[recog_info[:, 1]==1]
            recog_info = recog_info[recog_info[:, 0].argsort()] # sort by ImageNet 1000 categories
            recog_info = recog_info[category_sess-1]
            sess_info[:, -2] = recog_info[:, -1] # recog_rt
            sess_info[:, -1] = [1 if x==1 else 0 for x in recog_info[:, -2]] # recog_key
            print(f'Find beh info in {sub_name}_{sess_name}')
        else:
            print(f'Not Find beh info in {sub_name}_{sess_name}')
        # merge item into info dataframe
        sess_df = pd.DataFrame(sess_info, columns=['sub', 'session', 'run', 'label', 'fMRI_rt', 'fMRI_acc', 'recog_rt', 'recog_key'])
        sess_df.insert(3, 'stim', stim_sess)
        sess_df.insert(4, 'category', category_sess)
        info = pd.concat([info, sess_df], axis=0)
info.to_csv(pjoin('/nfs/z1/userhome/ZhouMing/workingdir/BIN/action/utils/HAD/result', 'beh_imagenet.csv'), index=False)


Find beh info in sub-core01_ses-ImageNet01
Find beh info in sub-core01_ses-ImageNet02
Find beh info in sub-core01_ses-ImageNet03
Find beh info in sub-core01_ses-ImageNet04
Not Find beh info in sub-core02_ses-ImageNet01
Find beh info in sub-core02_ses-ImageNet02
Find beh info in sub-core02_ses-ImageNet03
Find beh info in sub-core02_ses-ImageNet04
Not Find beh info in sub-core03_ses-ImageNet01
Find beh info in sub-core03_ses-ImageNet02
Find beh info in sub-core03_ses-ImageNet03
Find beh info in sub-core03_ses-ImageNet04
Find beh info in sub-core04_ses-ImageNet01
Find beh info in sub-core04_ses-ImageNet02
Find beh info in sub-core04_ses-ImageNet03
Find beh info in sub-core04_ses-ImageNet04
Find beh info in sub-core05_ses-ImageNet01
Find beh info in sub-core05_ses-ImageNet02
Find beh info in sub-core05_ses-ImageNet03
Find beh info in sub-core05_ses-ImageNet04
Find beh info in sub-core06_ses-ImageNet01
Find beh info in sub-core06_ses-ImageNet02
Find beh info in sub-core06_ses-ImageNet03
Fin

In [32]:
# Check whether the stimulus name can match between fMRI and recognition exp
# We use 'sub-core01', 'ses-ImageNet01' as an example

sess_path = '/nfs/z1/userhome/ZhouMing/workingdir/BIN/NaturalObject/data/behavior/data/fmri/train/sub-core01/ses-ImageNet01'
design_path = pjoin(sess_path, '%s_%s_design.mat'%('sub-core01', 'ses-ImageNet01'))
design_info = loadmat(design_path)
category_sess = design_info['sessPar'][:, :, 1].flatten()
recog_info = np.zeros((500, 4, 4))
for run_recog_idx in range(n_recog_run):
    run_recog_path = pjoin(sess_path, 'sub%02d_sess%02d_run%02d_beh.mat'%(1, 1, run_recog_idx+1))
    run_recog_info = loadmat(run_recog_path)['trial']
    recog_info[:, run_recog_idx] = run_recog_info[:, 1:] # category, condition, reponse, rt
# resort the recog info to match the original trial info
recog_info = recog_info.reshape((2000, 4))
present_idx = recog_info[:, 1]==1
sort_idx = recog_info[present_idx][:, 0].argsort()

stim_name = loadmat(run_recog_path)['exampleName'].reshape((2000))
stim_name = stim_name[present_idx]
stim_name = stim_name[sort_idx]
stim_name = stim_name[category_sess-1]
# Next step: open virable names to check whether stim_name match the sub-core01_ses-ImageNet01 stimulus in the info.csv


## HAD behaviour part

In [24]:
sub_names = sorted([i for i in os.listdir(had_path) if 'sub' in i and int(i[-2:])<=30])

info = pd.DataFrame(columns=['label', 'rt', 'acc', 'sub', 'session', 'run'])
# locate information in remaining subject
for sub_idx, sub_name in enumerate(sub_names):
    sub_path = pjoin(had_path, sub_name)
    for run_idx in np.linspace(1, n_had_run, n_had_run, dtype=int):
        run_name = '%s_sess01_run%02d.mat'%(sub_name, run_idx)
        run_path = pjoin(sub_path, 'sess01', run_name)
        if not os.path.exists(run_path):
            print(f'Find beh info missing in {run_name}')
            continue
        # load mat and extract info
        run_info = np.zeros((60, 6))
        raw_info = loadmat(run_path)['trial']
        # compute var into info in orders as follows:
        # label, rt, acc, sub, session, run
        run_sports = sports_info[(raw_info[:, 1]-1).astype(int)]
        acc_info = run_sports == raw_info[:, 3]
        run_info[:, 0] = run_sports
        run_info[:, 1] = raw_info[:, 4]
        run_info[:, 2] = acc_info
        # # handle specified subject 
        # if sub_name == 'sub20': 
        #     # sub20 and sub07 is the same person, so sub20-sess01 can be regarded as the sub07-sess03
        #     sub_idx, sess_idx = 0, 2
        run_info[:, 3:] = np.stack((np.repeat(int(sub_idx+1), 60), np.repeat(1, 60), np.repeat(run_idx, 60)), axis=1)
        run_df = pd.DataFrame(run_info, columns=['label', 'rt', 'acc', 'sub', 'session', 'run'])
        # merge item into info dataframe
        info = pd.concat([info, run_df], axis=0)
        print(f'Finish {run_name}')

info.to_csv(pjoin('/nfs/z1/userhome/ZhouMing/workingdir/BIN/action/utils/HAD/result', 'beh_had.csv'), index=False)


Finish sub01_sess01_run01.mat
Finish sub01_sess01_run02.mat
Finish sub01_sess01_run03.mat
Finish sub01_sess01_run04.mat
Finish sub01_sess01_run05.mat
Finish sub01_sess01_run06.mat
Finish sub01_sess01_run07.mat
Finish sub01_sess01_run08.mat
Finish sub01_sess01_run09.mat
Finish sub01_sess01_run10.mat
Finish sub01_sess01_run11.mat
Finish sub01_sess01_run12.mat
Finish sub02_sess01_run01.mat
Finish sub02_sess01_run02.mat
Finish sub02_sess01_run03.mat
Finish sub02_sess01_run04.mat
Finish sub02_sess01_run05.mat
Finish sub02_sess01_run06.mat
Finish sub02_sess01_run07.mat
Finish sub02_sess01_run08.mat
Finish sub02_sess01_run09.mat
Finish sub02_sess01_run10.mat
Finish sub02_sess01_run11.mat
Finish sub02_sess01_run12.mat
Finish sub03_sess01_run01.mat
Finish sub03_sess01_run02.mat
Finish sub03_sess01_run03.mat
Finish sub03_sess01_run04.mat
Finish sub03_sess01_run05.mat
Finish sub03_sess01_run06.mat
Finish sub03_sess01_run07.mat
Finish sub03_sess01_run08.mat
Finish sub03_sess01_run09.mat
Finish sub