In [None]:
from bids import BIDSLayout
import os

# Define your root directory
data_path = '/Volumes/T9/ds001486/derivatives/fmriprep'

# Initialize the layout
layout = BIDSLayout(data_path, validate=False, derivatives=False)

# Get all preprocessed BOLD files for the math task
# This returns a list of objects containing path info
bold_files = layout.get(suffix='bold', 
                        extension='nii.gz', 
                        desc='preproc', 
                        return_type='file')


In [None]:
import numpy as np
import os
from nilearn.maskers import NiftiLabelsMasker
from nilearn.connectome import ConnectivityMeasure
from nilearn import datasets

# --- 1. Your Defined Groups ---
mld_subs = ['059', '065', '067', '069', '071', '075', '076', '077', 
            '078', '083', '088', '095', '096', '103', '106']

td_subs = ['090', '036', '013', '008', '057', '070', '023', '024', 
           '053', '044', '034', '060', '007', '027', '010']

# --- 2. Setup Atlas and Masker ---
atlas = datasets.fetch_atlas_schaefer_2018(n_rois=400, yeo_networks=7)

masker = NiftiLabelsMasker(labels_img=atlas.maps, standardize=True, memory=None)
conn_measure = ConnectivityMeasure(kind='correlation', vectorize=True, discard_diagonal=True)

labels = atlas.labels 

# Since labels are often 'bytes' in Python, let's clean them to strings
labels = [label.decode('utf-8') if isinstance(label, bytes) else label for label in labels]

import pandas as pd
import numpy as np
import os

X = []          
y_group = []    
y_task = []     # 1 for Mult, 0 for Sub
groups = []     

print("Starting feature extraction (Math only)...")

# Output folder on your Desktop to avoid read-only errors
output_dir = os.path.expanduser('~/Desktop/math_results')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for bold_path in bold_files:
    filename = os.path.basename(bold_path)
    
    # 1. Precise Task Filtering
    # We explicitly check for Mult and Sub to avoid labeling Rhyming/Num as Sub
    if 'task-Mult' in filename:
        current_task = 1
    elif 'task-Sub' in filename:
        current_task = 0
    else:
        # Skips Rhyming, Num, or any other non-math tasks
        continue 

    # 2. Confound Path Construction
    parts = filename.split('_')
    essential_parts = [p for p in parts if any(x in p for x in ['sub-', 'ses-', 'task-', 'run-'])]
    confound_name = "_".join(essential_parts) + "_desc-confounds_timeseries.tsv"
    confound_path = os.path.join(os.path.dirname(bold_path), confound_name)

    if not os.path.exists(confound_path):
        continue

    # 3. Subject ID and Grouping (MLD vs TD)
    sub_id = filename.split('_')[0].split('-')[1] 
    group_label = 1 if sub_id in mld_subs else 0 #
    
    try:
        # Load and clean confounds (Handles NaNs and non-numeric columns)
        df = pd.read_csv(confound_path, sep='\t')
        df_numeric = df.select_dtypes(include=[np.number])
        df_clean = df_numeric.fillna(0).dropna(axis=1, how='all')
        
        # Extraction with standardized sample scaling
        masker.set_params(standardize='zscore_sample')
        conn_measure.set_params(standardize='zscore_sample') 
        
        time_series = masker.fit_transform(bold_path, confounds=df_clean)
        
        # Quality Check for Signal
        if np.any(np.isnan(time_series)) or np.any(np.isinf(time_series)):
            print(f"{sub_id} has bad signal (NaNs/Infs). Skipping.")
            continue
            
        correlation_vector = conn_measure.fit_transform([time_series])[0]
        
        # --- SUCCESS ---
        X.append(correlation_vector)
        y_group.append(group_label)
        y_task.append(current_task) # Store 1 for Mult, 0 for Sub
        groups.append(sub_id) 
        task_name = "Mult" if current_task == 1 else "Sub"
        print(f"{sub_id} | {task_name} added successfully.")
        
    except Exception as e:
        print(f"Error on {sub_id}: {e}")

# --- FINAL SAVE ---
# Saving all 4 arrays to your Desktop
X = np.array(X)
y = np.array(y_group)
yt = np.array(y_task)
g = np.array(groups)

np.save(os.path.join(output_dir, 'X_features.npy'), X)
np.save(os.path.join(output_dir, 'y_labels.npy'), y)
np.save(os.path.join(output_dir, 'y_tasks.npy'), yt) # New file for task labels
np.save(os.path.join(output_dir, 'subject_groups.npy'), g)

print(f"\nDONE! Saved {len(y)} samples to {output_dir}")