## Installation

If you don't already have `pydeface` installed then you can find the instructions to do so [here](https://github.com/poldracklab/pydeface). Running `pydeface --help`, after successful installation, is the most documentation available as of March 24th, 2021. 

In [39]:
import os 
from pathlib import Path

In [40]:
data_path = Path("/data/BrainBlocks/joyce/bids_modified") # directory path that contains 

In [41]:
# the nodeface-anat-scans directory contains a COPY of all the anat scans in the dataset. 
# No directories, just a bunch of anat scans all in one place, for example - 
anat_scans_with_face = data_path.joinpath("derivatives","nodeface-anat-scans")
listing = os.listdir(anat_scans_with_face)
listing[:5] # example 

['sub-ON01016_ses-01_acq-2d_run-01_FLAIR.nii.gz',
 'sub-ON01016_ses-01_acq-2d_run-02_FLAIR.nii.gz',
 'sub-ON01016_ses-01_acq-adni2d_run-01_FLAIR.nii.gz',
 'sub-ON01016_ses-01_acq-adni2d_run-02_FLAIR.nii.gz',
 'sub-ON01016_ses-01_acq-cube_run-01_T2w.nii.gz']

In [42]:
# create an output directory to store the pydeface outputs in. Within the output directory, one directory per subject
# would make life easier while merging defaced scans with original directory tree that contains anat scans with faces 
# at present 

output_dir = data_path.joinpath("derivatives","pydeface_outputs")
!mkdir $output_dir

mkdir: cannot create directory ‘/data/BrainBlocks/joyce/bids_modified/derivatives/pydeface_outputs’: File exists


## Modified `pydeface` workflow for HV-Protocol dataset

### PART 1: 
1. `fslmaths facemask.nii.gz -binv facemask_inv` (inverting default facemask)
2. `pydeface sub-ON01802_ses-01_acq-mprage_run-01_T1w.nii.gz --facemask ../../pydeface-defaults/data/facemask-inv.nii.gz --nocleanup --verbose --debug > T1w-deface-facemask-inv.txt`
3. `fslmaths sub-ON01802_ses-01_acq-mprage_run-01_T1w_pydeface_mask.nii.gz -binv inverted-T1w-pydeface-mask`
4. `fslmaths inverted-T1w-pydeface-mask.nii.gz -mul sub-ON01802_ses-01_acq-mprage_run-01_T1w.nii.gz sub-ON01802_ses-01_acq-mprage_run-01_T1w-new-mask-defaced.nii.gz`

### PART 2: 
a. FLAIR 
- `pydeface sub-ON01802_ses-01_acq-2d_run-01_FLAIR.nii.gz --template sub-ON01802_ses-01_acq-mprage_run-01_T1w.nii.gz --facemask inverted-T1w-pydeface-mask.nii.gz --nocleanup --verbose --force --debug > flair_pydeface.txt`
- `fslmaths sub-ON01802_ses-01_acq-2d_run-01_FLAIR_pydeface_mask.nii.gz -binv inverted-flair-pydeface-mask`
- `fslmaths inverted-flair-pydeface-mask -mul sub-ON01802_ses-01_acq-2d_run-01_FLAIR.nii.gz sub-ON01802_ses-01_acq-2d_run-01_FLAIR_new-mask-defaced.nii.gz` 

b. T2w 
- `pydeface sub-ON01802_ses-01_acq-abcdcube_run-01_T2w.nii.gz --template sub-ON01802_ses-01_acq-mprage_run-01_T1w.nii.gz --facemask inverted-T1w-pydeface-mask.nii.gz --nocleanup --verbose --force --debug > T2w-deface-facemask-inv.txt`
- `fslmaths sub-ON01802_ses-01_acq-abcdcube_run-01_T2w_pydeface_mask.nii.gz -binv inverted-T2w-pydeface-mask`
- `fslmaths inverted-T2w-pydeface-mask -mul sub-ON01802_ses-01_acq-abcdcube_run-01_T2w.nii.gz sub-ON01802_ses-01_acq-abcdcube_run-01_T2w_new-mask-defaced.nii.gz`

c. T2star
- `pydeface sub-ON01802_ses-01_run-01_T2star.nii.gz --template sub-ON01802_ses-01_acq-mprage_run-01_T1w.nii.gz --facemask inverted-T1w-pydeface-mask.nii.gz --nocleanup --verbose --force --debug > T2star-deface-facemask-inv.txt`
- `fslmaths sub-ON01802_ses-01_run-01_T2star_pydeface_mask.nii.gz -binv inverted-T2star-pydeface-mask`
- `fslmaths inverted-T2star-pydeface-mask -mul sub-ON01802_ses-01_run-01_T2star.nii.gz sub-ON01802_ses-01_run-01_T2star_new-mask-defaced.nii.gz`

In [43]:
# Inverting default facemask using the fslmaths command
# not running the command from the notebook. 
inv_cmd = "fslmaths <path/to/pydeface/pkg/dir>facemask.nii.gz \
-binv <path/to/dest>facemask_inv"

In [44]:
# magic command to capture the cell output in a file. Uncomment the next line when ready to write to a file
# %%capture cap --no-stderr 


# bids tree that contains anat scans with face (NOT the same as the nodeface-anat-scans under derivatives)
bids_root = data_path.joinpath("openneuro_upload_ready")

# list of subject directories in the dataset 
subjects = [subj for subj in os.listdir(bids_root) if subj.startswith("sub-") and not subj.endswith("emptyroom")]
scans_with_face = [x for x in os.listdir(anat_scans_with_face) if x.endswith(".nii.gz")]

for subj in subjects:
    subj_anat_scans = [os.path.join(anat_scans_with_face,file) for file in os.listdir(anat_scans_with_face) \
                       if file.startswith(subj) and file.endswith(".nii.gz")]
    # uncomment the next two lines if you'd like to see the list generated above
    # print(subj_anat_scans)
    # print("************************************")
    for scan in subj_anat_scans:
        # create a subj directory within output directory if it doesn't 
        # already exist
        subj_outdir = output_dir.joinpath(subject)
        if not os.path.exists(subj_outdir):
            mkdir_cmd = " ".join(["mkdir","-p",subj_outdir.as_posix()])
            cmd_stdout = !$mkdir_cmd
            if cmd_stdout:
                print(cmd_stdout)
    
    # construct pydeface commands for each subject 
    # using lscratch for computations can be faster too
    # hence, the TMPDIR installations
    env_variables = " ".join(["source ~/.bashrc",
                              "export TMPDIR=/lscratch/$SLURM_JOB_ID;",
                              "mkdir $TMPDIR/bids;","mkdir $TMPDIR/out;",
                              "cp", " ".join(subj_anat_scans),"$TMPDIR/bids;",
                              "conda activate /data/BrainBlocks/arshithab/env/pydeface",";" # this'll work only if the .bashrc/.bash_profile initializes conda
                             ])   
    tmp_input_dir = "$TMPDIR/bids"
    tmp_output_dir = "$TMPDIR/out"
    
    # copying the default inverted facemask to working directory 
    cp_default_inv_facemask = " ".join(["cp", os.path.join(bids_root, 'code', 'pydeface', 
                                                 'modified-workflow','facemask-inv.nii.gz'),
                                   tmp_input_dir,';'])
    
    # the T1w image for each subject being set as the template image for non-T1w modalities
    template_image = [image for image in anat_files if image.endswith("run-01_T1w.nii.gz")][0]
    
    # defacing the template image using the inverted default facemask
    template_deface_cmd = ' '.join(["pydeface", os.path.join(tmp_input_dir, template_image),
                                    "--facemask",
                                    os.path.join(tmp_input_dir,'facemask-inv.nii.gz'),
                                    "--nocleanup","--verbose","--force","--outfile",
                                    os.path.join(tmp_output_dir, template_image.split('.')[0]+'_inv_defaced.nii.gz'),
                                    ';'])
    # setting the path to the subject specific template facemask 
    template_facemask_path = os.path.join(tmp_output_dir,"T1w-facemask.nii.gz" )
    
    # creates a inverted defaced image of the T1w image
    # inverting the inverted defaced image of the T1w scan
    # would create the final defaced T1w image
    fslmath_cmds = ' '.join(["fslmaths", os.path.join(tmp_input_dir, "*T1w_pydeface_mask*"),
                            "-binv", template_facemask_path, ";",
                            "fslmaths", os.path.join(tmp_input_dir, template_image), 
                            "-mul", template_facemask_path, 
                             os.path.join(tmp_output_dir,template_image.split('.')[0]+"_defaced.nii.gz" ), 
                             ";"])
    
    # the following commands are meant to deface the non-T1w scans
    non_T1w_cmds = ""
    for image in anat_files:
        if image != template_image and not image.endswith("_pydeface_mask.nii.gz"):
            non_T1w_cmds = " ".join([non_T1w_cmds, "pydeface", os.path.join(tmp_input_dir,image),
                                    "--template", os.path.join(tmp_input_dir, template_image),
                                    "--facemask", template_facemask_path,
                                    "--nocleanup", "--verbose","--force",
                                    "--outfile", os.path.join(tmp_output_dir, image.split('.')[0]+"_defaced.nii.gz"),
                                    ";"])
    
    mv_pydeface_tmp_cmd = ' '.join(["mv", os.path.join(tmp_input_dir, "*_pydeface*"),
                                    tmp_output_dir,";"])

    mv_tmp_out_dir = " ".join(["mv", os.path.join(tmp_output_dir, "*"),subj_outdir.as_posix(),";" ])
    chng_perms = " ".join(["chmod -R ug+rwx", subj_outdir.as_posix(),";"])
    rm_tmp_dirs = " ".join(["rm -rf", tmp_input_dir, tmp_output_dir, ";"])
    print(' '.join([env_variables, cp_default_inv_facemask,template_deface_cmd, fslmath_cmds, 
                    non_T1w_cmds, mv_pydeface_tmp_cmd,mv_tmp_out_dir,
                   chng_perms, rm_tmp_dirs]))
    break # comment this out while generating commands for all subjects 

# the next two lines write the cell output to a swarm file
# with open(os.path.join(bids_root, "code",'pydeface-modified-wf.swarm'), 'w') as f:
#     f.write(cap.stdout)


source ~/.bashrc export TMPDIR=/lscratch/$SLURM_JOB_ID; mkdir $TMPDIR/bids; mkdir $TMPDIR/out; cp /data/BrainBlocks/joyce/bids_modified/derivatives/nodeface-anat-scans/sub-ON01016_ses-01_acq-2d_run-01_FLAIR.nii.gz /data/BrainBlocks/joyce/bids_modified/derivatives/nodeface-anat-scans/sub-ON01016_ses-01_acq-2d_run-02_FLAIR.nii.gz /data/BrainBlocks/joyce/bids_modified/derivatives/nodeface-anat-scans/sub-ON01016_ses-01_acq-adni2d_run-01_FLAIR.nii.gz /data/BrainBlocks/joyce/bids_modified/derivatives/nodeface-anat-scans/sub-ON01016_ses-01_acq-adni2d_run-02_FLAIR.nii.gz /data/BrainBlocks/joyce/bids_modified/derivatives/nodeface-anat-scans/sub-ON01016_ses-01_acq-cube_run-01_T2w.nii.gz /data/BrainBlocks/joyce/bids_modified/derivatives/nodeface-anat-scans/sub-ON01016_ses-01_acq-fspgr_run-01_T1w.nii.gz /data/BrainBlocks/joyce/bids_modified/derivatives/nodeface-anat-scans/sub-ON01016_ses-01_run-01_T2star.nii.gz $TMPDIR/bids; conda activate /data/BrainBlocks/arshithab/env/pydeface ; cp /data/BrainB

Once you create the swarm command file from above, the next step would be to run it on the biowulf command line. 

In [30]:
# swarm related helper functions  

def generate_swarm_cmd(file_path, memory, 
                       threads, jobname, 
                       module, lscratch_space, 
                       wall_time,logdir_path,
                       bundle):
    cmd = " ".join(["swarm -f",file_path,
                   "--gres=lscratch:"+lscratch_space,
                   "-g", memory,
                   "-t", threads,
                   "-b", bundle, 
                   "--logdir",logdir_path,
                   "--time", wall_time,
                   "--job-name", jobname,
                   "--module", module])
    return cmd

def check_job_status(user):
    log = !squeue -u $user
    if log:
        for each in log:
            print(each)

In [40]:
#********* TO BE CHANGED TO SET SWARM JOB RESOURCES *********#

swarm_filepath = os.path.join(bids_root, "code",'pydeface-modified-wf.swarm')
logdir = os.path.join(bids_root, "code","swarm_log")
threads = str(8)
memory = str(32)
wall_time = "48:00:00"
jobname = "pydeface"
module = "fsl"
lscratch = str(50)
bundle = str(0)

#**********************************************************#

In [41]:
biowulf_username = "arshithab"

In [46]:
cmd = generate_swarm_cmd(swarm_filepath, memory, threads, jobname, module, lscratch, wall_time,logdir,bundle)
!$cmd

7418077


In [80]:
check_job_status(biowulf_username)

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           7416593   persist spersist arshitha  R    2:54:18      1 cn2877
           7415453   persist spersist arshitha  R    3:12:59      1 cn2879
