# Prepare behaviour data for both BIN and HAD dataset
Data contains several columns are as follows:

- **label**: 1 means animate, -1 means inanimate  
- **rt**: response time in each trial    
- **acc**: accuracy. 1 means right, 0 means wrong    
- **sub**: subject ID  
- **session**: session ID  
- **run**: run ID 


In [23]:
# define path and params
import os
import numpy as np
import pandas as pd
from os.path import join as pjoin
from scipy.io import loadmat

bin_path = '/nfs/z1/userhome/ZhouMing/workingdir/BIN/NaturalObject/data/behavior/data/fmri/train'
had_path = '/nfs/z1/userhome/ZhouMing/workingdir/BIN/action/data/behavior'
animate_path = '/nfs/z1/userhome/ZhouMing/workingdir/BIN/Analysis_results/imagenet_decoding/imagenet_animate_or_not.mat'
animate_info = loadmat(animate_path)['animate_label'].squeeze()

action_path = '/nfs/z1/userhome/ZhouMing/workingdir/BIN/Analysis_results/actionTaxonomy_raw.csv'
action_class = pd.read_csv(action_path)
sports_info = (action_class.iloc[:, 3] == 'Participating_in_sports&Exercise&Recreation').to_numpy().astype(int)
sports_info = np.array([1 if i==1 else -1 for i in sports_info])

n_bin_run = 10
n_had_run = 12

In [11]:
core_sub = sorted([i for i in os.listdir(bin_path) if 'core' in i and int(i[-2:])<=10])
remain_sub = sorted([i for i in os.listdir(bin_path) if 'core' not in i and 'sub' in i and int(i[3:])<=40 and int(i[3:])>=10])

# prepare containers
info = pd.DataFrame(columns=['label', 'rt', 'acc', 'sub', 'session', 'run'])
# locate information in core subject 
for sub_idx, sub_name in enumerate(core_sub):
    sub_path = pjoin(bin_path, sub_name)
    sess_names = sorted([i for i in os.listdir(sub_path) if 'ses' in i and int(i[-2:])<=4])
    for sess_idx, sess_name in enumerate(sess_names):
        sess_path = pjoin(sub_path, sess_name)
        for run_idx in np.linspace(1, n_bin_run, n_bin_run, dtype=int):
            run_name = '%s_%s_run-%02d.mat'%(sub_name, sess_name, run_idx)
            run_path = pjoin(sess_path, run_name)
            # load mat and extract info
            run_info = np.zeros((100, 6))
            raw_info = loadmat(run_path)['trial']
            # compute var into info in orders as follows:
            # label, rt, acc, sub, session, run
            run_animate = animate_info[(raw_info[:, 1]-1).astype(int)]
            acc_info = run_animate == raw_info[:, 3]
            run_info[:, 0] = run_animate
            run_info[:, 1] = raw_info[:, 4]
            run_info[:, 2] = acc_info
            run_info[:, 3:] = np.stack((np.repeat(int(sub_idx+1), 100), np.repeat(int(sess_idx+1), 100), 
                                        np.repeat(run_idx, 100)), axis=1)
            run_df = pd.DataFrame(run_info, columns=['label', 'rt', 'acc', 'sub', 'session', 'run'])
            # merge item into info dataframe
            info = pd.concat([info, run_df], axis=0)
            print(f'Finish {run_name}')

# locate information in remaining subject
for sub_idx, sub_name in enumerate(remain_sub):
    sub_path = pjoin(bin_path, sub_name)
    for run_idx in np.linspace(1, n_bin_run, n_bin_run, dtype=int):
        run_name = '%s_sess01_run%02d.mat'%(sub_name, run_idx)
        run_path = pjoin(sub_path, 'sess01', run_name)
        # load mat and extract info
        run_info = np.zeros((100, 6))
        raw_info = loadmat(run_path)['trial']
        # compute var into info in orders as follows:
        # label, rt, acc, sub, session, run
        run_animate = animate_info[(raw_info[:, 1]-1).astype(int)]
        acc_info = run_animate == raw_info[:, 3]
        run_info[:, 0] = run_animate
        run_info[:, 1] = raw_info[:, 4]
        run_info[:, 2] = acc_info
        # # handle specified subject 
        # if sub_name == 'sub20': 
        #     # sub20 and sub07 is the same person, so sub20-sess01 can be regarded as the sub07-sess03
        #     sub_idx, sess_idx = 0, 2
        run_info[:, 3:] = np.stack((np.repeat(int(sub_idx+11), 100), np.repeat(1, 100), np.repeat(run_idx, 100)), axis=1)
        run_df = pd.DataFrame(run_info, columns=['label', 'rt', 'acc', 'sub', 'session', 'run'])
        # merge item into info dataframe
        info = pd.concat([info, run_df], axis=0)
        print(f'Finish {run_name}')

info.to_csv(pjoin('/nfs/z1/userhome/ZhouMing/workingdir/BIN/action/utils/HAD/result', 'beh_imagenet.csv'), index=False)


Finish sub-core01_ses-ImageNet01_run-01.mat
Finish sub-core01_ses-ImageNet01_run-02.mat
Finish sub-core01_ses-ImageNet01_run-03.mat
Finish sub-core01_ses-ImageNet01_run-04.mat
Finish sub-core01_ses-ImageNet01_run-05.mat
Finish sub-core01_ses-ImageNet01_run-06.mat
Finish sub-core01_ses-ImageNet01_run-07.mat
Finish sub-core01_ses-ImageNet01_run-08.mat
Finish sub-core01_ses-ImageNet01_run-09.mat
Finish sub-core01_ses-ImageNet01_run-10.mat
Finish sub-core01_ses-ImageNet02_run-01.mat
Finish sub-core01_ses-ImageNet02_run-02.mat
Finish sub-core01_ses-ImageNet02_run-03.mat
Finish sub-core01_ses-ImageNet02_run-04.mat
Finish sub-core01_ses-ImageNet02_run-05.mat
Finish sub-core01_ses-ImageNet02_run-06.mat
Finish sub-core01_ses-ImageNet02_run-07.mat
Finish sub-core01_ses-ImageNet02_run-08.mat
Finish sub-core01_ses-ImageNet02_run-09.mat
Finish sub-core01_ses-ImageNet02_run-10.mat
Finish sub-core01_ses-ImageNet03_run-01.mat
Finish sub-core01_ses-ImageNet03_run-02.mat
Finish sub-core01_ses-ImageNet03

In [24]:
sub_names = sorted([i for i in os.listdir(had_path) if 'sub' in i and int(i[-2:])<=30])

info = pd.DataFrame(columns=['label', 'rt', 'acc', 'sub', 'session', 'run'])
# locate information in remaining subject
for sub_idx, sub_name in enumerate(sub_names):
    sub_path = pjoin(had_path, sub_name)
    for run_idx in np.linspace(1, n_had_run, n_had_run, dtype=int):
        run_name = '%s_sess01_run%02d.mat'%(sub_name, run_idx)
        run_path = pjoin(sub_path, 'sess01', run_name)
        if not os.path.exists(run_path):
            print(f'Find beh info missing in {run_name}')
            continue
        # load mat and extract info
        run_info = np.zeros((60, 6))
        raw_info = loadmat(run_path)['trial']
        # compute var into info in orders as follows:
        # label, rt, acc, sub, session, run
        run_sports = sports_info[(raw_info[:, 1]-1).astype(int)]
        acc_info = run_sports == raw_info[:, 3]
        run_info[:, 0] = run_sports
        run_info[:, 1] = raw_info[:, 4]
        run_info[:, 2] = acc_info
        # # handle specified subject 
        # if sub_name == 'sub20': 
        #     # sub20 and sub07 is the same person, so sub20-sess01 can be regarded as the sub07-sess03
        #     sub_idx, sess_idx = 0, 2
        run_info[:, 3:] = np.stack((np.repeat(int(sub_idx+1), 60), np.repeat(1, 60), np.repeat(run_idx, 60)), axis=1)
        run_df = pd.DataFrame(run_info, columns=['label', 'rt', 'acc', 'sub', 'session', 'run'])
        # merge item into info dataframe
        info = pd.concat([info, run_df], axis=0)
        print(f'Finish {run_name}')

info.to_csv(pjoin('/nfs/z1/userhome/ZhouMing/workingdir/BIN/action/utils/HAD/result', 'beh_had.csv'), index=False)


Finish sub01_sess01_run01.mat
Finish sub01_sess01_run02.mat
Finish sub01_sess01_run03.mat
Finish sub01_sess01_run04.mat
Finish sub01_sess01_run05.mat
Finish sub01_sess01_run06.mat
Finish sub01_sess01_run07.mat
Finish sub01_sess01_run08.mat
Finish sub01_sess01_run09.mat
Finish sub01_sess01_run10.mat
Finish sub01_sess01_run11.mat
Finish sub01_sess01_run12.mat
Finish sub02_sess01_run01.mat
Finish sub02_sess01_run02.mat
Finish sub02_sess01_run03.mat
Finish sub02_sess01_run04.mat
Finish sub02_sess01_run05.mat
Finish sub02_sess01_run06.mat
Finish sub02_sess01_run07.mat
Finish sub02_sess01_run08.mat
Finish sub02_sess01_run09.mat
Finish sub02_sess01_run10.mat
Finish sub02_sess01_run11.mat
Finish sub02_sess01_run12.mat
Finish sub03_sess01_run01.mat
Finish sub03_sess01_run02.mat
Finish sub03_sess01_run03.mat
Finish sub03_sess01_run04.mat
Finish sub03_sess01_run05.mat
Finish sub03_sess01_run06.mat
Finish sub03_sess01_run07.mat
Finish sub03_sess01_run08.mat
Finish sub03_sess01_run09.mat
Finish sub