In [1]:
# this notebook is meant to prepare the sync pipeline for neural analysis
# run this code after cloning the data set

In [1]:
import os
import glob
import csv

import datalad.api as dl
import pandas as pd
import numpy as np
import nibabel as nb

In [3]:
# make sure you install datalad!
# once you have it - run this box to get the backbone of the dataset (not downloaded)

# ! change user name in designated directory !
! datalad clone https://github.com/OpenNeuroDatasets/ds000113.git /home/anakin/Desktop/ds000113 

Clone attempt:   0%|              | 0.00/2.00 [00:00<?, ? Candidate locations/s]
Enumerating: 0.00 Objects [00:00, ? Objects/s][A
                                              [A
Counting:   0%|                               | 0.00/3.00 [00:00<?, ? Objects/s][A
                                                                                [A
Compressing:   0%|                            | 0.00/3.00 [00:00<?, ? Objects/s][A
                                                                                [A
Receiving:   0%|                             | 0.00/48.6k [00:00<?, ? Objects/s][A
Receiving:   1%|▏                     | 487/48.6k [00:00<00:20, 2.31k Objects/s][A
Receiving:   5%|█                   | 2.43k/48.6k [00:00<00:14, 3.11k Objects/s][A
Receiving:  22%|████▍               | 10.7k/48.6k [00:00<00:08, 4.35k Objects/s][A
Receiving:  43%|████████▌           | 20.9k/48.6k [00:00<00:04, 5.75k Objects/s][A
Receiving:  48%|█████████▌          | 23.3k/48.6k [00:03<00:11,

In [18]:
# define the path to which the the dataset was initialized
data_dir = r'/home/anakin/Desktop/ds000113'

# define sub list for download
# you can check the folder to understand how the dataset is orgenized (BIDS format)
# for the purpose of the workshop we will download 2 runs for 8 subjects: subs 1-6 + 9-10

last_sub = 10
excluded = [7,8]
sub_list = ["sub-{:02d}".format(i) for i in range(1,last_sub+1) if i not in excluded]

# creat a dataset object using datalad API
ds = dl.Dataset(data_dir)
ds

Dataset('/home/anakin/Desktop/ds000113')

In [5]:
# we will download the functional and anatomical data using a for loop
# this includes run-3 and run-7 functional files (and their json file) and the anatomical file

for sub in sub_list:
    ds.get(glob.glob(os.path.join(data_dir,sub,'ses-movie','func',f'{sub}_ses-movie_task-movie_run-3_bold*')))
    ds.get(glob.glob(os.path.join(data_dir,sub,'ses-movie','func',f'{sub}_ses-movie_task-movie_run-7_bold*')))
    ds.get(glob.glob(os.path.join(data_dir,sub,'ses-forrestgump','anat',f'{sub}_ses-forrestgump_T1w.nii.gz')))
    
# check the directory and make sure downloaded files are now marked with a lock and not with a cross sign

In [6]:
# we can also tcheck how much data we downloaded using
ds.status(annex='all')

4895 annex'd files (1.1 GB/421.4 GB present/total size)
nothing to save, working tree clean


[{'type': 'file',
  'gitshasum': '89e69c0490716d5ab0861f147e66a4d1df2f8df0',
  'bytesize': 59,
  'prev_gitshasum': '89e69c0490716d5ab0861f147e66a4d1df2f8df0',
  'state': 'clean',
  'path': '/home/anakin/Desktop/ds000113/.bidsignore',
  'parentds': '/home/anakin/Desktop/ds000113',
  'refds': '/home/anakin/Desktop/ds000113',
  'action': 'status',
  'status': 'ok'},
 {'type': 'file',
  'gitshasum': 'c144473713ce9fe7a4d10a31ae82b8b605e36cac',
  'bytesize': 132,
  'prev_gitshasum': 'c144473713ce9fe7a4d10a31ae82b8b605e36cac',
  'state': 'clean',
  'path': '/home/anakin/Desktop/ds000113/.datalad/.gitattributes',
  'parentds': '/home/anakin/Desktop/ds000113',
  'refds': '/home/anakin/Desktop/ds000113',
  'action': 'status',
  'status': 'ok'},
 {'type': 'file',
  'gitshasum': '54bb368b084d39cff76d50133576769e172745c3',
  'bytesize': 63,
  'prev_gitshasum': '54bb368b084d39cff76d50133576769e172745c3',
  'state': 'clean',
  'path': '/home/anakin/Desktop/ds000113/.datalad/config',
  'parentds': '/h

In [7]:
# let's get a few more files 
ds.get(glob.glob(os.path.join(data_dir,'stimuli','annotations','*')))
ds.get(glob.glob(os.path.join(data_dir,'task-movie_bold.json')))
ds.get(glob.glob(os.path.join(data_dir,'participants.tsv')))

[{'action': 'get',
  'path': '/home/anakin/Desktop/ds000113/participants.tsv',
  'type': 'file',
  'refds': '/home/anakin/Desktop/ds000113',
  'status': 'notneeded',
  'message': 'already present'}]

In [8]:
# we will make a renamed copy (task = 'sync') of the data we downloaded:
# - create a copy of functional data for selected runs (renamed to 'task-sync') for all subjects
# - create a copy of the anatomical file (renamed) in a new folder

segments = [3,7] # original run numbers
runs = [i for i in range(1,3)] # desired naming convention
end_str = r"_bold.nii.gz"

for sub in sub_list:
    
    # anatomical file
    os.makedirs(os.path.join(data_dir,sub,'ses-movie','anat')) # create a directory
    anat_in = os.path.join(data_dir,sub,'ses-forrestgump','anat',sub +'_ses-forrestgump_T1w.nii.gz')
    img = nb.load(anat_in)
    anat_out = os.path.join(data_dir,sub,'ses-movie','anat',sub +'_T1w.nii.gz')
    save = nb.save(img, anat_out)
    
    for seg,run in zip(segments,runs):         
    
    # functional files
        func_in = os.path.join(data_dir,sub,'ses-movie','func',sub +'_ses-movie_task-movie_run-'+str(seg)+end_str)
        img = nb.load(func_in)
        func_out = os.path.join(data_dir,sub,'ses-movie','func',sub +'_task-sync_run-'+str(run)+end_str)
        save = nb.save(img, func_out)

# note that the new files are not associated with the original datalad dataset (dont have a lock on their icon)
# which is desired if we want to manipulate the file. 
# for the purpose of this workshop we will not slice and dice the files themselves 
# but if you ever need to do this - know that it is now possible. 

In [22]:
# another thing to be done is to copy our event files of both runs to every subject functional folder
# since I modeled mutual events (percieved audiovisual features and social synchrony), 
# the event file is the same for all subjects.

# in case you are running your own paradigm - it is highly recommended taht you create a tsv file
# for the experimental events in the same format as displayed here 
# (4 columns named 'onset','duration','weight' and 'stimulus')

run1_data = pd.read_csv('data/sync_run-1_events.tsv',delimiter='\t')
run2_data = pd.read_csv('data/sync_run-2_events.tsv',delimiter='\t')
run1_data.head() # print the start of dataframe to see the format, we will drop the index column later when saving

headers =  ["onset", "duration", "weight", "stimulus"]

for sub in sub_list:
    # use BIDS format naming
    filename_run1 = os.path.join(data_dir,sub,'ses-movie','func',sub +'_task-sync_run-1_events.tsv')
    filename_run2 = os.path.join(data_dir,sub,'ses-movie','func',sub +'_task-sync_run-2_events.tsv')
    # save events
    run1_data.to_csv(filename_run1, sep='\t', index=False)
    run2_data.to_csv(filename_run2, sep='\t', index=False)

In [13]:
# lastly - we will use FSL BET to skull-strip our anatomical data - important for preprocessing stage!

import nipype.interfaces.fsl as fsl

bet = fsl.BET()
for sub in sub_list:
    bet.inputs.frac = 0.5 # fractional intensity threshold, 0.5 is default
    bet.inputs.in_file = os.path.join(data_dir,sub,'ses-movie','anat',sub +'_T1w.nii.gz')
    bet.inputs.out_file = os.path.join(data_dir,sub,'ses-movie','anat',sub +'_T1w_brain.nii.gz')
    result = bet.run()

In [11]:
print('all preperations are done!')

all preperations are done!
