In [34]:
import os
import pandas as pd
import numpy as np
import nibabel as nib

In [35]:
base_data_dir = "/Users/reedandreas/Desktop/academic_unsynched/3892"
TR = 2.0

In [36]:
# print the base data directory files
print(os.listdir(base_data_dir))

['.DS_Store', 'clean_data', 'split_data']


In [37]:
curr_sub_dir = '/clean_data/sub-01'
# for all the files that end in .gz we need to unzip them
for file in os.listdir(base_data_dir + curr_sub_dir):
    if file.endswith('.gz'):
        # unzip the file
        os.system('gunzip ' + base_data_dir + curr_sub_dir + '/' + file)

In [38]:
# open sub-01_ses-action01_task-action_run-01_events.tsv
events_file = base_data_dir + curr_sub_dir + '/sub-01_ses-action01_task-action_run-01_events.tsv'
# read the file into a pandas dataframe
events_df = pd.read_csv(events_file, sep='\t')

In [39]:
events_df

Unnamed: 0,onset,duration,trial_type,response_time,stim_file
0,0.121954,2.01039,46.0,1.377274,Drum corps/v_Drum corps_id_0Qo8FzNJ460_start_2...
1,4.034585,2.014053,104.0,2.441726,Playing kickball/v_Playing kickball_id_HByszN4...
2,8.032399,2.015562,58.0,2.403914,Hand car wash/v_Hand car wash_id_HZW1njYbYPI_s...
3,12.034439,2.013465,138.0,2.649863,Shuffleboard/v_Shuffleboard_id_iyz1BjhV1J4_sta...
4,16.032445,2.015296,32.0,2.443899,Croquet/v_Croquet_id_Q_rVIfFRxbk_start_12.0_la...
5,24.033977,2.012432,48.0,2.84238,Fixing bicycle/v_Fixing bicycle_id_9PhZthMj_do...
6,28.03031,2.01621,150.0,2.822163,Starting a campfire/v_Starting a campfire_id_4...
7,32.030169,2.016148,71.0,2.438269,Kayaking/v_Kayaking_id_fSNi_-ThHRk_start_17.0_...
8,36.03083,2.01523,59.0,2.253623,Hand washing clothes/v_Hand washing clothes_id...
9,40.034215,2.011353,38.0,2.42617,Doing crunches/v_Doing crunches_id_IGQusCP8OyA...


In [40]:
events_df['stim_file'] = events_df['stim_file'].str.split('/').str[0]

In [41]:
def split_nifti(input_file, output_file, start_time, end_time, tr):
    # Load the NIfTI file
    img = nib.load(input_file)
    data = img.get_fdata()
    
    # Calculate start and end volumes
    start_vol = int(start_time / tr)
    end_vol = int(end_time / tr)
    
    # Extract the desired time points
    split_data = data[:,:,:,start_vol:end_vol]
    
    # Create a new NIfTI image with the split data
    split_img = nib.Nifti1Image(split_data, img.affine, img.header)
    
    # Save the new image
    nib.save(split_img, output_file)
    print(f"Split NIfTI saved: {output_file}")

In [42]:
import nibabel as nib
import glob

first_nii = glob.glob(base_data_dir + curr_sub_dir + '/*.nii')

# Load the NIfTI file
img = nib.load(first_nii[0])

# Get the TR from the header
print(img.header)

<class 'nibabel.nifti1.Nifti1Header'> object, endian='<'
sizeof_hdr      : 348
data_type       : np.bytes_(b'')
db_name         : np.bytes_(b'')
extents         : 16384
session_error   : 0
regular         : np.bytes_(b'r')
dim_info        : 0
dim             : [  4  91 109  91 156   1   1   1]
intent_p1       : 0.0
intent_p2       : 0.0
intent_p3       : 0.0
intent_code     : none
datatype        : float64
bitpix          : 64
slice_start     : 0
pixdim          : [-1.  2.  2.  2.  0.  0.  0.  0.]
vox_offset      : 0.0
scl_slope       : nan
scl_inter       : nan
slice_end       : 0
slice_code      : unknown
xyzt_units      : 10
cal_max         : 0.0
cal_min         : 0.0
slice_duration  : 0.0
toffset         : 0.0
glmax           : 0
glmin           : 0
descrip         : np.bytes_(b'2203.11-dirty 2023-08-23T14:21:21+01:00')
aux_file        : np.bytes_(b'')
qform_code      : scanner
sform_code      : scanner
quatern_b       : 0.0
quatern_c       : 1.0
quatern_d       : 0.0
qoffset_x    

In [43]:
events_df['end_time'] = events_df['onset'] + events_df['duration']

In [44]:
events_df

Unnamed: 0,onset,duration,trial_type,response_time,stim_file,end_time
0,0.121954,2.01039,46.0,1.377274,Drum corps,2.132344
1,4.034585,2.014053,104.0,2.441726,Playing kickball,6.048638
2,8.032399,2.015562,58.0,2.403914,Hand car wash,10.047961
3,12.034439,2.013465,138.0,2.649863,Shuffleboard,14.047904
4,16.032445,2.015296,32.0,2.443899,Croquet,18.047741
5,24.033977,2.012432,48.0,2.84238,Fixing bicycle,26.046409
6,28.03031,2.01621,150.0,2.822163,Starting a campfire,30.04652
7,32.030169,2.016148,71.0,2.438269,Kayaking,34.046317
8,36.03083,2.01523,59.0,2.253623,Hand washing clothes,38.04606
9,40.034215,2.011353,38.0,2.42617,Doing crunches,42.045568


In [45]:
# load id_to_stim_file.json
import json
with open('mappings/id_to_stim_file.json', 'r') as f:
    id_to_stim_file = json.load(f)

stim_file_to_id = {}
for key, value in id_to_stim_file.items():
    stim_file_to_id[value] = key
# make a mapping of stim_file to id
# basically we need to get all the stim_file values in the df and assign a numerical value to them
# then we can use that mapping to map to the id

In [53]:
# make a mapping of stim_file to id
# basically we need to get all the stim_file values in the df and assign a numerical value to them
# then we can use that mapping to map to the id

events_df['id'] = None

for index, row in events_df.iterrows():
    # find the id in the filename
    key_word = row['stim_file'].split('/')[0]
    # if the key word is in the id_to_stim_file dict, then we can use that id
    if key_word in id_to_stim_file:
        events_df.at[index, 'id'] = id_to_stim_file[key_word]
    else:
        # generate a new random id and then add to both dicts
        new_id = np.random.randint(0, 1000000)
        id_to_stim_file[key_word] = new_id
        stim_file_to_id[row['stim_file']] = new_id
        events_df.at[index, 'id'] = new_id
# now we have a mapping of stim_file to id
# we can use this to create a new column in the df

print(events_df)


         onset  duration  trial_type  response_time  \
0     0.121954  2.010390        46.0       1.377274   
1     4.034585  2.014053       104.0       2.441726   
2     8.032399  2.015562        58.0       2.403914   
3    12.034439  2.013465       138.0       2.649863   
4    16.032445  2.015296        32.0       2.443899   
5    24.033977  2.012432        48.0       2.842380   
6    28.030310  2.016210       150.0       2.822163   
7    32.030169  2.016148        71.0       2.438269   
8    36.030830  2.015230        59.0       2.253623   
9    40.034215  2.011353        38.0       2.426170   
10   48.035028  2.009345        37.0       2.409387   
11   52.029505  2.014972       117.0       2.366887   
12   56.036046  2.024834        66.0       2.528334   
13   60.030038  2.013805        65.0       2.510370   
14   64.028541  2.015238        50.0       2.607882   
15   72.035488  2.023891       153.0       2.400898   
16   76.032950  2.009496        45.0       2.619499   
17   80.03

In [54]:
# for each id, create a folder
# inside base_data_dir + split_data
for id in events_df['id'].unique():
    os.makedirs(base_data_dir + '/split_data/' + str(id))

In [60]:
# now we need to split the nifti files
# we want to write the resulting split nifti files to the id's folder
# for each subject 01-30
for sub in range(1, 31):
    # need a stringed sub which formats to 01-30
    curr_sub_dir = '/clean_data/sub-' + str(sub).zfill(2)
    # go from 1 to 12 inclusive
    for run in range(1, 13):
        for index, row in events_df.iterrows():
            split_nifti(base_data_dir + curr_sub_dir + '/' + f'clean_sub-{str(sub).zfill(2)}_task-action_run-{run}_desc-preproc_bold.nii', base_data_dir + '/split_data/' + str(row['id']) + '/' + f'sub_{str(sub).zfill(2)}_run_{run}.nii', row['onset'], row['end_time'], TR)

Split NIfTI saved: /Users/reedandreas/Desktop/academic_unsynched/3892/split_data/509738/sub_01_run_1.nii
Split NIfTI saved: /Users/reedandreas/Desktop/academic_unsynched/3892/split_data/949996/sub_01_run_1.nii
Split NIfTI saved: /Users/reedandreas/Desktop/academic_unsynched/3892/split_data/332793/sub_01_run_1.nii
Split NIfTI saved: /Users/reedandreas/Desktop/academic_unsynched/3892/split_data/506325/sub_01_run_1.nii
Split NIfTI saved: /Users/reedandreas/Desktop/academic_unsynched/3892/split_data/275113/sub_01_run_1.nii
Split NIfTI saved: /Users/reedandreas/Desktop/academic_unsynched/3892/split_data/296266/sub_01_run_1.nii
Split NIfTI saved: /Users/reedandreas/Desktop/academic_unsynched/3892/split_data/760889/sub_01_run_1.nii
Split NIfTI saved: /Users/reedandreas/Desktop/academic_unsynched/3892/split_data/326053/sub_01_run_1.nii
Split NIfTI saved: /Users/reedandreas/Desktop/academic_unsynched/3892/split_data/518569/sub_01_run_1.nii
Split NIfTI saved: /Users/reedandreas/Desktop/academic_

FileNotFoundError: No such file or no access: '/Users/reedandreas/Desktop/academic_unsynched/3892/clean_data/sub-02/clean_sub-02_task-action_run-1_desc-preproc_bold.nii'

In [56]:
# save both the id_to_stim_file and stim_file_to_id to the mappings folder
import json

with open('mappings/id_to_stim_file.json', 'w') as f:
    json.dump(id_to_stim_file, f)

with open('mappings/stim_file_to_id.json', 'w') as f:
    json.dump(stim_file_to_id, f)



# 