In [1]:
# this code loads the data
# created by Lingwei Ouyang for Neurohackademy project on August 8th, 2025
# load nifti data and output tsv of masked MTL ROIs

In [1]:
# import packages for S3 object
from cloudpathlib import S3Path, S3Client
from pathlib import Path

In [2]:
from utilities import ls, crawl 
from cloudpathlib import CloudPath, S3Client
import nibabel as nib
import numpy as np
import pandas as pd

In [3]:
import nilearn
import matplotlib.pyplot as plt
import os

In [4]:
# load the S3 object first
# Set up our cache path:
cache_path = Path('/tmp/cache')
if not cache_path.exists():
    cache_path.mkdir()

# Create the root S3Path for the NSD:
nsd_base_path = S3Path(
    's3://natural-scenes-dataset/',
    client=S3Client(
        no_sign_request=True,
        local_cache_dir=cache_path))

In [5]:
# change subject_id, label_value, and num_sessions
def nifti_to_array(filepath):
    img = nib.load(filepath.fspath)
    img_dat = img.dataobj
    img_array = np.asanyarray(img_dat)
    return(img_array)

# change this for each subject
def load_masked_session_betas(
    session_num,
    subject_id='subj01', # change for every subject
    resolution='func1mm',
    roi='MTL',
    label_value=1, # go through 1 to 10
    nsd_base_path=nsd_base_path  
):
    # Build file paths
    beta_filename = f'betas_session{session_num:02d}.nii.gz'
    beta_path = nsd_base_path / 'nsddata_betas' / 'ppdata' / subject_id / resolution / 'betas_fithrf_GLMdenoise_RR' / beta_filename
    mask_path = nsd_base_path / 'nsddata' / 'ppdata' / subject_id / resolution / 'roi' / f'{roi}.nii.gz'
    
    print(f"Loading {beta_path}")
    
    # Load data
    func_img = nifti_to_array(beta_path)
    mask_img = nifti_to_array(mask_path)
    print(f"loading func and mask")
    
    # Apply mask
    mask = (mask_img == label_value).astype(bool)
    func = func_img[mask, :]
    
    # Convert to DataFrame
    func_df = pd.DataFrame(func.T)
    func_df['SESSION'] = session_num
    func_df['SUBJECT'] = int(''.join(filter(str.isdigit, subject_id)))
    func_df['TRIAL'] = range(1, len(func_df) + 1)
    
    # Define the output path
    output_path = f"/home/jovyan/projects/memory_NSD/behav/{roi}/{label_value}"

    # Create the directory if it doesn't exist
    os.makedirs(output_path, exist_ok=True)
    func_df.to_csv(f"{output_path}/{subject_id}_{roi}_{label_value}_{session_num}.tsv", sep='\t', index=False)
    return func_df

# Loop through all sessions
# change the num_session to the correct one
def load_all_sessions(num_sessions=40, **kwargs):
    for session_num in range(18, num_sessions + 1):
        load_masked_session_betas(session_num, **kwargs)



In [6]:
# Example usage
all_data = load_all_sessions(nsd_base_path=nsd_base_path)

Loading s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/func1mm/betas_fithrf_GLMdenoise_RR/betas_session18.nii.gz
loading func and mask
Loading s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/func1mm/betas_fithrf_GLMdenoise_RR/betas_session19.nii.gz
loading func and mask
Loading s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/func1mm/betas_fithrf_GLMdenoise_RR/betas_session20.nii.gz
loading func and mask
Loading s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/func1mm/betas_fithrf_GLMdenoise_RR/betas_session21.nii.gz
loading func and mask
Loading s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/func1mm/betas_fithrf_GLMdenoise_RR/betas_session22.nii.gz
loading func and mask
Loading s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/func1mm/betas_fithrf_GLMdenoise_RR/betas_session23.nii.gz
loading func and mask
Loading s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/func1mm/betas_fithrf_GLMdenoise_RR/betas_session24.nii.gz
loading func a

KeyboardInterrupt: 

In [None]:
# load H5 files



In [49]:
# for the subject, read the session file
#import pandas as pd
#path_behav = f"/home/jovyan/projects/memory_NSD/behav/{subject_id.replace('j', '')}_behav.tsv"
#behav_file = pd.read_csv(path_behav, sep = '\t')