In [None]:
#!/usr/bin/env python3

import os
import shutil
import json
import csv
from glob import glob
import gzip
import shutil as sh

# Variables
ORIG_DIR = "/Volumes/DBSExp/ChuaFUS/LeadDBSBIDS/derivatives/leaddbs"
DEST_DIR = "/Volumes/HowExp/datasets/HIFUS_minisets"
JSON_LOC = "/Volumes/DBSExp/MasterDataset/HornDatasets/participants.json"

FOLDERS_TO_EXTRACT = ["miscellaneous"]
FILES_TO_EXTRACT = {
    "preprocessing/anat/*T1w*": "ses-pre/anat/T1",
    "coregistration/anat/sub-*-postop_space-anchorNative_desc-preproc_acq-sag_MRI": "ses-post/anat/T1",
}
# Ensure destination directory exists
os.makedirs(DEST_DIR, exist_ok=True)

# with open(JSON_LOC, 'r') as f:
#     participants_list = json.load(f)
#     participants = {participant['id']: participant for participant in participants_list if 'id' in participant}

# # Fast metadata retrieval
# def find_metadata(sub_id):
#     return participants.get(sub_id)


# Process each subfolder
for subfolder in os.listdir(ORIG_DIR):
    subfolder_path = os.path.join(ORIG_DIR, subfolder)
    # metadata = find_metadata(subfolder)
    
    # if metadata is None:
    #     print(f"No metadata found for {subfolder}, skipping.")
    #     continue

    city = 'BWH'
    condition = 'EssentialTremor'
    subject = subfolder
    # city = metadata.get('City', 'misc')
    # condition = metadata.get('Condition', 'unknown')
    # subject = metadata['id']

    subdir = f"{city}_{condition}_DBS"
    dataset_dir = os.path.join(DEST_DIR, subdir)

    # if os.path.exists(os.path.join(dataset_dir, 'data', subject)):
    #     print(f"Subject directory {subject} already exists. Skipping...")
    #     continue
    os.makedirs(os.path.join(dataset_dir, "data"), exist_ok=True)
    os.makedirs(os.path.join(dataset_dir, "metadata"), exist_ok=True)
    
    # Write metadata to master CSV
    metadata_file = os.path.join(dataset_dir, "metadata", "master_list.csv")
    file_exists = os.path.isfile(metadata_file)
    with open(metadata_file, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        if not file_exists:
            writer.writerow(["ID", "Age", "ElModel", "Condition", "YearsSinceDx", "Target", "DOIs"])
        writer.writerow([
            subject,
            'unknown',
            'hifus',
            condition,
            'NA',
            'VIM',
            '10.1126/sciadv.adp0532'
        ])

    # Copy required folders/files
    for d in FOLDERS_TO_EXTRACT:
        full_pattern = os.path.join(subfolder_path, d)
        dst_folder = os.path.join(dataset_dir, "data", subject, 'ses-pre', d)
        if os.path.exists(dst_folder):
            print(f"Folder {dst_folder} already exists. Skipping...")
            continue
        os.makedirs(dst_folder, exist_ok=True)
        if os.path.isdir(full_pattern):
            shutil.copytree(full_pattern, dst_folder, dirs_exist_ok=True)
        
    for key, value in FILES_TO_EXTRACT.items():
        full_pattern = os.path.join(subfolder_path, key)
        dst_file = os.path.join(dataset_dir, "data", subject, value)
        found_files = glob(full_pattern)
        os.makedirs(os.path.dirname(dst_file), exist_ok=True)
        for i, f in enumerate(found_files):
            # Compress the .nii file to .nii.gz
            suffix = "" if i == 0 else f"_{i}"
            with open(f, 'rb') as f_in, gzip.open(dst_file + f"{suffix}.nii.gz", 'wb') as f_out:
                sh.copyfileobj(f_in, f_out)
    print(f"Processed subject: {subfolder}")