In [2]:
from cloudpathlib import S3Client, S3Path
from pathlib import Path

#Setting up cache path
cache_path = Path("/home/jovyan/cache/")
if not cache_path.exists():
    cache_path.mkdir()

#Creating base path for NSD dataset
nsd_base_path = S3Path(
    's3://natural-scenes-dataset/', 
    client = S3Client(
        no_sign_request = True, 
        local_cache_dir = cache_path))


In [3]:
import nibabel as nib
import numpy as np

#Choose subject and session
subject = 1
session = 1

#pass subject and session values to load betas
betas_path = nsd_base_path / f"nsddata_betas/ppdata/subj{subject:02d}/func1mm/betas_fithrf_GLMdenoise_RR/betas_session{session:02d}.nii.gz"

#Load and convert to %signal change ~ dividing by 300
img = nib.load(betas_path.fspath)
betas_proxy = img.dataobj
betas_proxy.shape

#If memory permits, uncomment below and comment proxy betas
#betas = img.get_fdata(dtype=np.float32) / 300.0
#betas.shape


(145, 186, 148, 750)

In [4]:
#Applying mtl mask
mtl_mask_path = nsd_base_path / f"nsddata/ppdata/subj{subject:02d}/func1mm/roi/MTL.nii.gz"
mtl_mask_img = nib.load(mtl_mask_path.fspath)
mtl_mask_img.shape

(145, 186, 148)

In [9]:
mtl_mask = mtl_mask_img.get_fdata(dtype = np.float32, caching = 'unchanged')

#getting vectorized non-zero roi boolean mask volume from mtl mask 

mtl_mask_volume = mtl_mask > 0 #same shape as spatial dims

x_idx, y_idx, z_idx = np.where(mtl_mask_volume)
mtl_betas_list = []

for x, y, z in zip(x_idx, y_idx, z_idx):
    voxel_data = np.asarray(betas_proxy[x, y, z, :], dtype = np.float32) / 300.0
    mtl_betas_list.append(voxel_data)

mtl_betas = np.stack(mtl_betas_list, axis = 1) # shape: trial x voxels for intuitive analysis


KeyboardInterrupt: 

In [None]:
import h5py

#Save to combined h5 - create once and keep appending
hdf5_path = cache_path / "all_subjects_mtl_betas.h5"
hdf5_path.mkdir(exist_ok = True)

with h5py.File(hdf5_path, 'a') as hf:
    subj_group = h5.require_group(f"subj{subject:02d}")

    #overwrite session dataset if it exists to avoid error
    if f"session{session:02d}" in subj_group:
        del subj_group[f"session{session:02d}"]

    subj_group.create_dataset(f"session{session:02d}", data = mtl_betas, compression = 'gzip')

In [1]:
import gc
gc.collect()

37