### 1 Importing the libraries

In [1]:
import sys
!{sys.executable} -m pip install nilearn tqdm openpyxl

import runpy
import os
import numpy as np
import pandas as pd
import pickle
from datetime import datetime

from nilearn import datasets
from nilearn.input_data import NiftiMapsMasker
from confounds import extract_confounds

from tqdm import tqdm
from nilearn.image import load_img
from nilearn.maskers import NiftiMapsMasker
import math

### 2 Loading the paths to fMRI files

In [2]:
# Load list of preprocessed functional NIfTI files
with open("rfmri_rest_paths.pkl", "rb") as fp:
    func_imgs = pickle.load(fp)

### 3 Loading the labels

In [3]:
import math

path_to_excel = "demo_data_bin_test.xlsx"

df = pd.read_excel(path_to_excel)
ids = df["Subject ID how it's defined in lab/project"].to_numpy()

df_meta = pd.read_excel(path_to_excel)

### 4 Loading the atlas

In [4]:
dimension_of_atlas = 64

# Load atlas and gray matter mask
fetcher = runpy.run_path('fetcher.py')
fetch_difumo = fetcher['fetch_difumo']
maps_img = fetch_difumo(dimension=dimension_of_atlas).maps

### 4 Fetch grey matter mask from nilearn shipped with ICBM templates

In [5]:
#downloads the mask to home/nilearn_data/icbm152_2009
gm_mask = datasets.fetch_icbm152_brain_gm_mask(threshold=0.2)

### 5 Extract timeseries

In [None]:
import os
import pickle
from datetime import datetime
from nilearn._utils.niimg_conversions import check_same_fov
from nilearn.maskers import NiftiMapsMasker  # assuming this import is needed

# --- Step 0: Define normalization function ---
def normalize_id(id_str):
    return id_str.split('_')[0]  # Keep only prefix before first underscore

# --- Step 1: Extract subject ID from path ---
def extract_subj_id_from_path(path):
    # Extract folder name (subject ID with suffixes)
    return os.path.basename(os.path.dirname(path))

# --- Step 2: Build mapping from normalized subject IDs to functional paths ---
func_id_to_path = {}
for raw_id, path in func_imgs.items():  # iterating dict keys and values
    norm_id = normalize_id(raw_id)
    func_id_to_path[norm_id] = path

# --- Step 3: Load and normalize metadata IDs ---
meta_ids = df_meta["Subject ID how it's defined in lab/project"].astype(str).tolist()
meta_ids_norm = [normalize_id(mid) for mid in meta_ids]

# --- Step 4: Find common IDs and build aligned lists ---
common_ids = [mid for mid in meta_ids_norm if mid in func_id_to_path]
print(f"Subjects with both functional data and metadata (normalized): {len(common_ids)}")

aligned_func_paths = [func_id_to_path[mid] for mid in common_ids]
aligned_meta_ids = common_ids

assert len(aligned_func_paths) == len(aligned_meta_ids), "Mismatch after alignment."

# === Masker parameters ===
mask_params = {
    'mask_img': gm_mask,
    'detrend': True,
    'standardize': True,
    'high_pass': 0.01,
    'low_pass': 0.1,
    't_r': 2.53,
    'smoothing_fwhm': 6.,
    'verbose': 1
}
masker = NiftiMapsMasker(maps_img=maps_img, **mask_params)

# === Output directory ===
os.makedirs("chunked_data", exist_ok=True)

# === Chunking ===
n_chunks = 10
num_participants = len(aligned_func_paths)
ttotal = datetime.now()

for i, (func_img, subj_id) in enumerate(zip(aligned_func_paths, aligned_meta_ids)):
    print(f"\nProcessing subject {i+1}/{num_participants} ({subj_id})")
    t1 = datetime.now()

    confounds = extract_confounds(func_img, mask_img=gm_mask, n_confounds=10)
    signals = masker.fit_transform(func_img, confounds=confounds)  # shape: (T, n_ROIs)

    T = signals.shape[0]
    chunk_len = T // n_chunks
    if chunk_len < 10:
        print(f"⚠️ Warning: Subject {subj_id} has very short time series ({T} TRs). Skipping.")
        continue

    all_chunks = []
    for j in range(n_chunks):
        start = j * chunk_len
        end = (j + 1) * chunk_len if j < n_chunks - 1 else T
        chunk_data = signals[start:end, :]
        all_chunks.append(chunk_data)

    with open(f"chunked_data/{subj_id}.pkl", "wb") as f:
        pickle.dump(all_chunks, f)

print(f"\n🎉 All {num_participants} subjects processed in", str(datetime.now() - ttotal).split(".")[0])
