In [None]:
import pandas as pd

import livingpark_utils.dataset as dataset


cohort_file = "mak-etal-2017-7934773459255573745.csv"
cohort = pd.read_csv(cohort_file)

cohort["filename"] = cohort.apply(
    lambda row: dataset.ppmi.find_nifti_file_in_cache(row["PATNO"], row["EVENT_ID"], row["Description"]),
    axis=1,
)
print(f"Missing T1 nifti files: {cohort[cohort['filename'].isnull()].size}")

In [None]:
from pathlib import Path
import re


def filter_cached_patnos(df, pipeline, use_cache=True):
    exitcodes = Path(".exitcodes")
    
    if use_cache and exitcodes.exists():
        regex = re.compile(r"(?P<pipeline>.*)\.(?P<patno>\d*)\.(?P<state>.*)")
        cached_patnos = [
            int(m["patno"]) 
            for filename in exitcodes.iterdir() 
            if (m := regex.search(filename.name)) and m["pipeline"] == pipeline
        ]
        return df[~df["PATNO"].isin(cached_patnos)].reset_index()
    
    return df

In [None]:
import json
from pathlib import Path

# We drop duplicated patno as FSL SIENA computes a longitudinal metric between both visits.
fsl_data = cohort.drop_duplicates(subset="PATNO")[["PATNO", "filename"]].merge(
    cohort.drop_duplicates(subset="PATNO", keep="last")[["PATNO", "filename"]],
    on=["PATNO"],
    suffixes=(None, "_2")
)

fsl_data = filter_cached_patnos(fsl_data, "fsl_siena")

json_data = fsl_data[["PATNO", "filename", "filename_2"]].to_json()
meta = json.loads(json_data)

with Path("fsl_siena.json").open("w") as fout:
    json.dump(meta, fout, indent=4)

In [None]:
%%writefile fsl_siena.py
import os
import json
from pathlib import Path
import sys

import boutiques
from boutiques.descriptor2func import function


zenodo_id = "zenodo.7435009"
fsl_siena = function(zenodo_id)

task_id = str(os.environ["SLURM_ARRAY_TASK_ID"])
with open("fsl_siena.json") as fin:
    slurm_jobs = json.load(fin)

output_dir = Path("outputs", "fsl_siena", str(slurm_jobs["PATNO"][task_id])).resolve()
output_dir.parent.mkdir(mode=0o755, parents=True, exist_ok=True)

output = fsl_siena(
    "--imagepath",
    "fsl-6.0.4.sif",
    input1=slurm_jobs["filename"][task_id],
    input2=slurm_jobs["filename_2"][task_id],
    output_dir=output_dir.as_posix(),
    viena=True,
)

exitcode = output.exit_code
print(output.stdout)
print(output.stderr, file=sys.stderr)

# Write successful computations
if exitcode == 0:
    patno = slurm_jobs['PATNO'][task_id]
    exitcode_file = Path(".exitcodes", f"fsl_siena.{patno}.success").resolve()
    exitcode_file.parent.mkdir(mode=0o755, exist_ok=True)
    Path.touch(exitcode_file, mode=0o644, exist_ok=True)

In [None]:
%%writefile fsl_siena.slurm
#!/bin/sh

#SBATCH -J fsl_siena
#SBATCH --nodes=1
#SBATCH --mem=8G
#SBATCH --cpus-per-task=2
# Outputs ----------------------------------
#SBATCH -o log/%x-%A-%a.out
#SBATCH -e log/%x-%A-%a.err
# ------------------------------------------

module load singularity
source .venv/bin/activate
python fsl_siena.py

In [None]:
Path("log").mkdir(mode=0o755, exist_ok=True)

In [None]:
! sbatch --array=0-$(( $(jq ".PATNO | length" fsl_siena.json) - 1 ))%60 --account=rrg-glatard fsl_siena.slurm