In [None]:
# CELL D
# Priority:
# 1) If TS_DIR contains *_ts.npy -> compute from those
# 2) Else if CONN_DIR already contains .npz -> skip compute
# 3) Else attempt to fetch ADHD subset and compute end-to-end

from joblib import Parallel, delayed

# helper: compute Z from ts and save
def compute_and_save_from_ts(ts_path):
    base = os.path.basename(ts_path).replace('_ts.npy','')
    subj = base.split('_')[0]
    ts = np.load(ts_path).astype(np.float32)
    C = np.corrcoef(ts.T)
    np.fill_diagonal(C,1.0)
    C = np.clip(C, -0.999999, 0.999999)
    Z = np.arctanh(C)
    npz_path, meta = save_conn_npz(Z, subj)
    return npz_path

ts_files = sorted(glob.glob(os.path.join(TS_DIR, '*_ts.npy')))
npz_existing = sorted(glob.glob(os.path.join(CONN_DIR,'*.npz')))

if len(ts_files) > 0:
    print("Found timeseries files:", len(ts_files), "-> computing connectivity and saving .npz")
    results = Parallel(n_jobs=2)(delayed(compute_and_save_from_ts)(p) for p in ts_files)
    print("Saved:", len([r for r in results if r is not None]), ".npz files")
elif len(npz_existing) > 0:
    print("Found existing connectivity .npz files:", len(npz_existing), "-> skipping computation")
else:
    print("No timeseries or .npz found locally. Attempting to fetch ADHD subset and compute timeseries+connectivity.")
    # fetch ADHD and compute timeseries quickly using nilearn (may take some minutes)
    from nilearn import datasets
    adhd = datasets.fetch_adhd(n_subjects=30, data_dir=WORKDIR, resume=True, verbose=1)
    # extract timeseries with Schaefer atlas for each func (quick implementation)
    from nilearn import datasets as nds
    atlas = nds.fetch_atlas_schaefer_2018(n_rois=200, yeo_networks=7, data_dir=WORKDIR)
    atlas_img = atlas['maps']
    from nilearn.input_data import NiftiLabelsMasker
    import pandas as pd, nibabel as nib
    def extract_ts_quick(func_path, confound_path=None):
        hdr = nib.load(func_path).header
        t_r = hdr.get_zooms()[-1] if hdr.get_zooms() else None
        conf = pd.read_csv(confound_path, sep='\t') if confound_path else None
        masker = NiftiLabelsMasker(labels_img=atlas_img, standardize=True, detrend=True, t_r=t_r, low_pass=0.1, high_pass=0.01)
        return masker.fit_transform(func_path, confounds=conf)
    func_files = adhd.func
    confounds = adhd.confounds if hasattr(adhd,'confounds') else [None]*len(func_files)
    pairs = list(zip(func_files, confounds))
    print("Processing", len(pairs), "ADHD func files ...")
    def proc_pair(idx_pair):
        func_path, conf = pairs[idx_pair]
        subj = f"sub-{idx_pair+1:03d}"
        ts = extract_ts_quick(func_path, conf)
        ts_out = os.path.join(TS_DIR, f"{subj}_ses-1_ts.npy")
        np.save(ts_out, ts.astype(np.float32))
        return ts_out
    outs = Parallel(n_jobs=2)(delayed(proc_pair)(i) for i in range(len(pairs)))
    print("Saved timeseries for ADHD subset:", len(outs))
    # now compute connectivity from saved ts
    ts_files = sorted(glob.glob(os.path.join(TS_DIR, '*_ts.npy')))
    results = Parallel(n_jobs=2)(delayed(compute_and_save_from_ts)(p) for p in ts_files)
    print("Saved connectivity .npz files:", len([r for r in results if r is not None]))


No timeseries or .npz found locally. Attempting to fetch ADHD subset and compute timeseries+connectivity.


  from nilearn.input_data import NiftiLabelsMasker


Processing 30 ADHD func files ...
Saved timeseries for ADHD subset: 30
Saved connectivity .npz files: 30


In [None]:
# CELL E
import csv
npz_files = sorted(glob.glob(os.path.join(CONN_DIR,'*.npz')))
print("Total .npz files:", len(npz_files))
with open(MANIFEST,'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['file','subj','session','atlas','shape','dtype','md5'])
    for npz in npz_files:
        meta_path = npz.replace('.npz','.json')
        if os.path.exists(meta_path):
            with open(meta_path,'r') as f:
                meta = json.load(f)
            writer.writerow([meta.get('file'), meta.get('subj'), meta.get('session','ses-1'),
                             meta.get('atlas'), 'x'.join(map(str,meta.get('shape',[]))), meta.get('dtype'), meta.get('md5')])
        else:
            writer.writerow([npz,'unk','ses-1','unk','','',''])
print("Manifest written to:", MANIFEST)


Total .npz files: 30
Manifest written to: /content/drive/MyDrive/fmri_fingerprint/manifest_connectivity.csv
