# Preprocessing Pipeline for Structural Data

- Sections:
    - Set-up: Run every time to create input dictionary
    - Part 1:
    - Part 2:
    - Part 3:

# Set-Up: 
## Select Subjects to Process and Set Variables - This section needs to be run before each of the 3 Parts

### Packages and Functions Needed for This Notebook

In [41]:
import os
import shutil
from glob import glob
from nilearn import plotting
import subprocess
import sys
import matplotlib_inline
from tqdm import tqdm
import tempfile
from pathlib import Path
from time import sleep


# export MPLBACKEND=TkAgg 

In [91]:
def submit_slurm_job(job_name, command, partition="bch-compute", nodes=1, ntasks=1, cpus=16, mem="50G", time="24:00:00"):
    
    
    """
    Submits a job to the Slurm job scheduler.

    This function creates a script that has the given command, and then submits this script to the Slurm job scheduler.
    
    Parameters:
    job_name (str): A name for the job. This will help you identify the job later.
    command (str): The command that you want to run.
    partition (str): The partition on the cluster where you want to run the job. Defaults to "bch-compute".
    nodes (int): The number of nodes (computers) that you want to use to run the job. Defaults to 1.
    cpus_per_task (int): The number of CPUs (processing units) that you want to use on each node. Defaults to 16.
    mem (str): The amount of memory that you want to use on each node. Defaults to "50GB".
    time (str): The maximum amount of time that the job is allowed to run. Defaults to "10:00:00" (10 hours).

    Returns:
    job_id (str): The ID of the job that was submitted. You can use this ID to check on the status of the job later.
    """

    
    script = f"""#!/bin/bash
    
#SBATCH --job-name={job_name}
#SBATCH --partition={partition}
#SBATCH --nodes={nodes}
#SBATCH --ntasks={ntasks} 
#SBATCH --cpus-per-task={cpus}
#SBATCH --mem={mem}
#SBATCH --time={time}
#SBATCH -o output_%j.txt
#SBATCH --mail-type=NONE

    # Run the command
    export MPLBACKEND=TkAgg
    
    source /lab-share/Neuro-Cohen-e2/Public/environment/load_neuroimaging_env.sh

    set -e

    {command}
    """
    
    # Write the script to a temporary file
    with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
        f.write(script)
        script_file = f.name

    # Make the script executable
    subprocess.run(["chmod", "+x", script_file])

    try:
        # Submit the job using sbatch through the shell
        output = subprocess.check_output(['sbatch', script_file]).decode('utf-8')

        # Extract the job ID from the output
        job_id = output.strip().split()[-1]
    except subprocess.CalledProcessError as e:
        print(f"Error submitting job: {e}")
        job_id = None
    
    sleep(5)

    return job_id


In [92]:
def submit_slurm_job_test(job_name, command, partition="bch-compute", nodes=1, ntasks=1, cpus=16, mem="50G", time="24:00:00"):
    
    
    """
    Submits a job to the Slurm job scheduler.

    This function creates a script that has the given command, and then submits this script to the Slurm job scheduler.
    
    Parameters:
    job_name (str): A name for the job. This will help you identify the job later.
    command (str): The command that you want to run.
    partition (str): The partition on the cluster where you want to run the job. Defaults to "bch-compute".
    nodes (int): The number of nodes (computers) that you want to use to run the job. Defaults to 1.
    cpus_per_task (int): The number of CPUs (processing units) that you want to use on each node. Defaults to 16.
    mem (str): The amount of memory that you want to use on each node. Defaults to "50GB".
    time (str): The maximum amount of time that the job is allowed to run. Defaults to "10:00:00" (10 hours).

    Returns:
    job_id (str): The ID of the job that was submitted. You can use this ID to check on the status of the job later.
    """

    
    script = f"""#!/bin/bash
    
#SBATCH --job-name={job_name}
#SBATCH --partition={partition}
#SBATCH --nodes={nodes}
#SBATCH --ntasks={ntasks}  
#SBATCH --cpus-per-task={cpus}
#SBATCH --mem={mem}
#SBATCH --time={time}
#SBATCH -o output_%j.txt
#SBATCH --mail-type=NONE

    # Run the command
    export MPLBACKEND=TkAgg
    source /lab-share/Neuro-Cohen-e2/Public/environment/load_neuroimaging_env.sh

    set -e

    {command}
    """
    
    # Write the script to a temporary file
    with open(f'slurm_script_{job_name}.sh', 'w') as f:
        f.write(script)
    






In [114]:
def create_input_dict(input_folder, subjects_to_skip=None, input_type='Folder'):
    
    """
    Creates a dictionary of subjects and their corresponding sessions from a given input folder.

    This function looks at the files in the input folder and creates a dictionary where the keys are the subject IDs and the values are lists of sessions for each subject.

    Parameters:
    input_folder (str): The path to the folder that contains the input files.
    subjects_to_skip (list): A list of subject IDs that you want to skip. Defaults to None.
    input_type (str): The type of input folder. Can be either 'BIDS' or 'Folder'. Defaults to 'Folder'.

    Returns:
    subject_sessions (dict): A dictionary where the keys are the subject IDs and the values are lists of sessions for each subject.

    Note:
    If input_type is 'BIDS', the function assumes that the input folder is organized according to the BIDS format.
    If input_type is 'Folder', the function assumes that all selected scans are in one folder and the file names start with the subject ID followed by an underscore.
    """
        
    
    if not Path(input_folder).is_dir():
        raise ValueError("Input folder is not a valid directory")
    
    subject_sessions = {}
    
    if input_type=='BIDS':
        subjects=[f for f in os.listdir(input_folder) if os.path.isdir(os.path.join(input_folder, f))]
        
        if subjects_to_skip is not None:
            subjects = [subject for subject in subjects if subject not in subjects_to_skip]
            
        print("There are", len(subjects), "unique subjects to be registered")
        
        for subject in sorted(subjects):
            # Get the path to the subject folder
            subject_path = os.path.join(input_folder, subject)

            # Get a list of session folders within the subject folder
            sessions = [f for f in os.listdir(subject_path) if os.path.isdir(os.path.join(subject_path, f))]

            # Add the subject and sessions to the dictionary
            subject_sessions[subject] = sessions


        print(subject_sessions)
        
    
    elif input_type=='Folder':
        #All selected scans are in one folder
        #Assumptions: first part of file name is subject ID followed by an _
        subjects=sorted(set([os.path.basename(i).split('_')[0] for i in glob(f'{input_folder}/*.nii*')]))
        
        if subjects_to_skip is not None:
            subjects = [subject for subject in subjects if subject not in subjects_to_skip]
            
        print("There are", len(subjects), "unique subjects to be registered")
        
        
        for subject in sorted(subjects):
            for file in glob(f'{input_folder}/*{subject}*.nii*'):
                subject = os.path.basename(file).split('_')[0]
                session = os.path.basename(file).split('_')[1]

                if subject not in subject_sessions:
                    subject_sessions[subject] = []
                if session not in subject_sessions[subject]:  
                    subject_sessions[subject].append(session)


        print(subject_sessions)

    
    else:
        raise ValueError(f"Invalid input_type: '{input_type}'. Should be either 'BIDS' or 'Folder'.")
    
    
    return subject_sessions

In [43]:
def set_registration_target(file_names):
    
    """
    Sets the registration target based on a list of file names.

    This function looks at the file names and sets the registration target to the first file that matches a certain criterion.

    Parameters:
    file_names (list): A list of file names.

    Returns:
    reg_target (str): The registration target.
    
    Note:
    The function checks if any of the file names contain the registration target strings (Reg_target_1 or Reg_target_2).
    If no registration target is found, a ValueError is raised.
    """
        
    global Reg_target_1
    global Reg_target_2
    reg_target = None
    for file_name in file_names:
        if Reg_target_1 in file_name:
            reg_target = Reg_target_1
            break
    if reg_target is None:
        for file_name in file_names:
            if Reg_target_2 in file_name:
                reg_target = Reg_target_2
                break
    if reg_target is None:
        raise ValueError(f"No registration target found in {file_names}")
    return reg_target

### Put your information below then create the input dictionary (subject_sessions)

In [82]:
# input_dir="test_input_combine"  #Folder with input files
# #input_dir = '/lab-share/Neuro-Cohen-e2/Public/lesions/RDCRN_TSC/'

# input_type='BIDS' #'BIDS' or 'Folder'
# subjects_to_skip=['sub-MGH083']  

# output_dir="output_combine_test"


# IMAGE_TYPES = ['T1w', 'T2w', 'FLAIR'] #case sensitive, change to match what you used e.g. t1, t1w, TI 
# Reg_target_1='T1w' #your ideal registration target, case sensitive
# Reg_target_2='T2w' #your second choice for registration target, case sensitive

In [105]:
input_dir="RDCRN_test"  #Folder with input files
input_type='BIDS' #'BIDS' or 'Folder'
#input_dir = '/lab-share/Neuro-Cohen-e2/Public/lesions/RDCRN_TSC/'
output_dir="/lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/output_RDCRN_test_20240619"

subjects_to_skip=None

skullstrip='synthstrip' 

mni_software='EasyReg'

IMAGE_TYPES = ['t1', 't2'] #case sensitive, change to match what you used e.g. t1, t1w, TI 
Reg_target_1='t1' #your ideal registration target, case sensitive
Reg_target_2='t2' #your second choice for registration target, case sensitive

#Bias correction only works on T1 or T2, bias correction is set to work on the registration target

In [115]:
input_dir="/lab-share/Neuro-Cohen-e2/Public/notebooks/jpeters/RAW/"  #Folder with input files
input_type='Folder' #'BIDS' or 'Folder'
output_dir="output_jpeters_20240620_test"

subjects_to_skip=['c002', 'c003'] 

skullstrip='both' #synthstrip, optibet, both, None 
                        #for both - run it for Part2 (on registration target) and pick your favorite, then update this variable before running Part3
mni_software='EasyReg' #'ANTs' or 'ANTsQuick'

IMAGE_TYPES = ['t1', 'T2', 'FLAIR'] #case sensitive, change to match what you used e.g. t1, t1w, TI 
Reg_target_1='t1' #your ideal registration target, case sensitive
Reg_target_2='T2'

In [116]:
# Create a dictionary of subjects and their corresponding sessions from the input directory
subject_sessions=create_input_dict(input_dir, subjects_to_skip=subjects_to_skip, input_type=input_type)

There are 1 unique subjects to be registered
{'c001': ['s01']}


# Part 1: Make Output Folders and Combine Images

## Part 1 Functions

In [117]:
def print_tree(d, n=5, indent=0):
    """
    Recursively prints the folder structure.
    
    Parameters:
    d (dict): The folder structure dictionary.
    n (int): The maximum number of subjects to print. Default is 5.
    indent (int): The indentation level (number of spaces). Default is 0.
    """
    
    subset = {k: d[k] for k in list(d)[:n]}
    
    for key, value in subset.items():
        print('    ' * indent + str(key))
        if isinstance(value, list):
            for item in value:
                print('    ' * (indent + 1) + str(item))
        elif isinstance(value, dict):
            print_tree(value, indent + 1)

In [97]:
def combine_images(working_dir, list_of_images, out_name, clean_up=True):
#images should be inside working_dir somewhere 

#def combine_images(working_dir, input_dir, participant, session, image_type, list_of_images, clean_up=True):
    """
    Combines images of different directions using niftymic.
    
    Parameters:
    working_dir (str): The working directory.
    input_dir (str): The input directory.
    participant (str): The participant ID.
    session (str): The session ID.
    image_type (str): The image type.
    list_of_images (list): The list of images to combine.
    clean_up (bool): Whether to clean up temporary files. Default is True.
    
    Returns:
    command (str): The command to combine images.
    """
    

    BIDSPATH = '/lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/0_pipeline_scripts'
   
    
    for i, image in enumerate(list_of_images, start=1):
        mask_file = f'{working_dir}/temp_{i}_{out_name}_mask.nii.gz'
        

        result = subprocess.run(['fslmaths', image, '-abs', '-bin', mask_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        if result.returncode != 0:
            print(result.stderr.decode())
            raise Exception(f'Failed to create mask for {image}')
    
    mask_files = [f'{working_dir}/temp_{i}_{out_name}_mask.nii.gz' for i in range(1, len(list_of_images) + 1)]
    output_file = f'{working_dir}/{out_name}.nii.gz'
    #images=[i.replace(f'{working_dir}', '/app/data') for i in list_of_images] 
    
    cmd = [
        'singularity', 'exec',
        '-B', f'{working_dir}:/app/data',
        '-B', f'{BIDSPATH}:{BIDSPATH}',
        f'{BIDSPATH}/niftymic.sif',
        'niftymic_reconstruct_volume',
        '--filenames', *list_of_images,
        '--filenames-masks', *mask_files,
        '--output', output_file
    ]
    
    if clean_up == True:
        cmd += '\n'
        cmd += [
            'rm', '-r', 
            f'{working_dir}/config*', 
            f'{working_dir}/temp*', 
            f'{working_dir}/*mask*', 
            f'{working_dir}/motion_correction'
        ]
        
    command = ' '.join(cmd)

    return command


### Part 1a : Make output folder system

In [118]:
print(f'Your output folder, {output_dir}, will look something like this: ')
print_tree(subject_sessions)

Your output folder, output_jpeters_20240620_test, will look something like this: 
c001
    s01


In [99]:
# Create output folders for each subject and session based on the subject_sessions dictionary
for subject, sessions in subject_sessions.items():
    
    subject_folder = os.path.join(output_dir, subject)
    
    # Create the subject folder if it doesn't exist
    if not os.path.exists(subject_folder):
        os.makedirs(subject_folder)
    
    
    # Loop through each session for the subject
    for session in sorted(set(sessions)):
        print(f'Preparing output folders for {session} for {subject}')
        session_folder = os.path.join(subject_folder, session)
         
        # Check if the session folder exists and is not empty
        if os.path.exists(session_folder):
            if os.listdir(session_folder):
                print(f"{subject}: {session_folder} exists and is not empty")
        else:
            # Create the session folder if it doesn't exist
            if not os.path.exists(session_folder):
                os.makedirs(session_folder)

Preparing output folders for s01 for c001


### Part 1b: copy selected files to output folder system, combine images as needed (will submit to SLURM)

In [None]:
# Copy images to output folders; Combine images as needed
combine_dict={}
for subject, sessions in subject_sessions.items():
    subject_folder = os.path.join(output_dir, subject)
    

    # Loop through each session for the subject
    for session in sorted(set(sessions)):
        session_folder = os.path.join(subject_folder, session)
        
        if not os.path.exists(f'{session_folder}/SELECTED'):
            os.makedirs(f'{session_folder}/SELECTED')
         
        # Loop through each image type 
        for image_type in IMAGE_TYPES:

            # Check if a file with the image type already exists in the session folder
            if glob(f'{session_folder}/SELECTED/*{image_type}*.nii*'):
                    print(f"Error: A file with image type '{image_type}' already exists in {session_folder}/SELECTED")
            else: 
                # Get the list of images for the image type
                if input_type == 'Folder':
                    images = glob(f'{input_dir}/{subject}*{session}*{image_type}*.nii*') 
                elif input_type == 'BIDS':
                    images = glob(f'{input_dir}/{subject}/{session}/*{image_type}*.nii*')
             
                # Check if there are more than 3 images to combine
                if len(images) > 3:
                    print(f"Error: More than 3 images found for participant {subject}, session {session}, and image type {image_type}.")
                    continue
                
                # Combine the images if there are more than 1
                if len(images) > 1 and image_type not in ['FLAIR', 'flair', 'Flair']:
                    os.makedirs(f'{session_folder}/SELECTED/precombine', exist_ok=True)
                    [shutil.copy(i,f'{session_folder}/SELECTED/precombine') for i in images]
                    print(f"combining: {images}")
                    images_to_combine=glob(f'{session_folder}/SELECTED/precombine/*{image_type}*nii*') 
                    command = combine_images(f'{session_folder}/SELECTED', images_to_combine, f'{subject}_{session}_{image_type}', clean_up=True)
                    job_name = f"combine_images_{subject}_{session}_{image_type}"
                    job_id=submit_slurm_job(job_name, command)
                    combine_dict[(subject,session)] = job_id
                    
                if len(images) > 1 and image_type in ['FLAIR', 'flair', 'Flair']:
                    print('You have more than one FLAIR image, Combining these may not work well, please select one')
                
                # Copy the single image to the session folder
                elif len(images) == 1:
                    print(f"copying: {images} to {session_folder}/SELECTED/{subject}_{session}_{image_type}.nii.gz")
                    shutil.copy(images[0], f'{session_folder}/SELECTED/{subject}_{session}_{image_type}.nii.gz')

# Print the number of combine jobs submitted to SLURM
if len(combine_dict) > 0:
    print('You have', len(combine_dict), 'Combine Jobs submitted to SLURM; subject and job IDs are stored in combine_dict')
    print('You can type "squeue -u $USER" into your terminal to track SLURM job progress')
    print('You can check the output file matching the jobid in combine_dict to see code outputs and any errors')
else:
    print('You have', len(combine_dict), 'Combine Jobs')

In [102]:
#See your combine jobs on SLURM
!squeue -u $USER

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           1590972 bch-compu combine_ ch236393  R       0:18      1 compute-10-15
           1587644 bch-inter     bash ch236393  R    4:05:01      1 compute-5-0-3


# Part 2: Prepare Registration Images

## Part 2 Functions

In [52]:
def reslice_image(input_image):
    """
    Reslices an image to 1mm isovolumetric if the largest pixel dimension is greater than 1.5mm.
    
    Parameters:
    input_folder (str): The input folder.
    participant (str): The participant ID.
    session (str): The session ID.
    reg_target (str): The registration target.
    """
    
    if not os.path.exists(input_image):
        raise FileNotFoundError(f"File {input_image} does not exist")
    # Get the maximum pixel width
    cmd = f"fslinfo {input_image} | grep pixdim[1-3] | awk '{{ print $2 }}' | sort -rn | head -1"
    max_pixelwidth = float(subprocess.check_output(cmd, shell=True).strip())

    if max_pixelwidth > 1.5:
        print(f"Largest pixel dimension is {max_pixelwidth} > 1.5mm, reslicing to 1mm isovolumetric")
        
        stem = input_image.split('.')[0]
        size = 1
        output_file = f"{stem}_{size}mm.nii.gz"

        cmd = f"flirt -interp spline -in {input_image} -ref {input_image} -applyisoxfm {size} -out {output_file}"
        subprocess.run(cmd, shell=True)

        os.rename(input_image, f"{stem}_aniso.nii.gz")
        os.rename(output_file, input_image)
    else:
        print(f"Largest pixel dimension is {max_pixelwidth}, leaving image alone")
        

def bias_corr(input_image, image_type, skullstrip=None, clean_up=True):
    """
    Performs bias correction on an image.
    
    Parameters:
    input_folder (str): The input folder.
    participant (str): The participant ID.
    session (str): The session ID.
    reg_target (str): The registration target.
    mask (bool): Whether to create a brain mask. Default is True.
    clean_up (bool): Whether to clean up temporary files. Default is True.
    
    Returns:
    cmd (str): The command to run.
    """
        
    stem = input_image.split('.')[0] 
    folder = os.path.dirname(input_image)
    cmd="echo Starting\n"
    if os.path.exists(f"{stem}_orig.nii.gz"):
        print(f'{stem}_orig.nii.gz already exists, suggesting this image has been bias corrected already!')
        return
    
    # add scripts to Path so code can find them
    cmd = f"export PATH=$PATH:/lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/0_pipeline_scripts/\n"
    
    # Run fsl_anat_alt.sh
    cmd += f"fsl_anat_alt.sh -i {stem} -t {img_type} --noreg --nosubcortseg --noseg\n"

    # Rename files
    cmd += f"mv {stem}.nii.gz {stem}_orig.nii.gz\n" 
    cmd += f"mv {stem}.anat/T1_biascorr.nii.gz {stem}.nii.gz\n" 

    
    if skullstrip in ['optibet', 'both']:
        suffix = '_optibet'
        cmd += f"mv {stem}.anat/{image_type}_biascorr_brain.nii.gz {stem}_SkullStripped{suffix}.nii.gz\n"
        cmd += f"mv {stem}.anat/{image_type}_biascorr_brain_mask.nii.gz {stem}_brain-mask{suffix}.nii.gz\n"
    
    if skullstrip in ['synthstrip', 'both']:
        #suffix = '_synthstrip' if skullstrip == 'both' else ''
        suffix = '_synthstrip'
        out_file = f"{stem}_SkullStripped{suffix}.nii.gz"
        out_mask = f"{stem}_brain-mask{suffix}.nii.gz"
        cmd += f"mri_synthstrip -i {input_image} -o {out_file} -m {out_mask}\n"

        
    # Run fslmaths
    cmd += f"fslmaths {stem}.nii.gz {stem}.nii.gz -odt short\n"
    
    if clean_up == True:
        cmd += f"rm -r {stem}.anat\n"
        
    return cmd
    

### Run Part 2; will submit jobs to SLURM

In [83]:
skullstrip='synthstrip'

In [90]:
bias_corr_dict={}
for subject, sessions in subject_sessions.items():
    reg_target=None
    subject_folder = os.path.join(output_dir, subject) 
    for session in sessions:
        print(f'*** Processing {subject}: {session} ***')
        session_folder=os.path.join(subject_folder, session)
        
        reg_target=set_registration_target(glob(f'{session_folder}/SELECTED/*.nii*'))
        print(reg_target)
        
        #reg_image=glob(f'{session_folder}/SELECTED/*{reg_target}*')[0]
        reg_image=f'{session_folder}/SELECTED/{subject}_{session}_{reg_target}.nii.gz'
        reg_image = [file for file in glob(f'{session_folder}/SELECTED/*{reg_target}*') if 'orig' not in file and 'optibet' not in file and 'anat' not in file][0]
        
        print(f'Reslicing {reg_image}')
        #reslice_image(reg_image)
        
        print(f'Bias Correcting {reg_image}')
        if '1' in reg_target:
            img_type='T1'
        elif '2' in reg_target:
            img_type='T2'
        
        command=bias_corr(reg_image, img_type, skullstrip=skullstrip, clean_up=False) 
        job_name = f"bias_correct_{subject}_{session}_{reg_target}"
        # script_file=command_script(job_name, command)
        # print(script_file)
        # job_id=sbatch_script(job_name, script_file)
        job_id=submit_slurm_job_test(job_name, command)
        bias_corr_dict[(subject,session)] = job_id

if len(bias_corr_dict) > 0:
    print('You have', len(bias_corr_dict), 'Bias Correction Jobs submitted to SLURM; subject and job IDs are stored in bias_corr_dict')
    print('You can type "squeue -u $USER" into your terminal to track SLURM job progress')
    print('You can check the output file matching the jobid in bias_corr_dict to see code outputs and any errors')
else:
    print('You have', len(bias_corr_dict), 'Bias Correction Jobs')

*** Processing 7901-01-001: scan01 ***
t1
Reslicing /lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/output_RDCRN_test_20240619/7901-01-001/scan01/SELECTED/7901-01-001_scan01_t1_SkullStripped_synthstrip.nii.gz
Bias Correcting /lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/output_RDCRN_test_20240619/7901-01-001/scan01/SELECTED/7901-01-001_scan01_t1_SkullStripped_synthstrip.nii.gz
*** Processing 7901-01-001: scan03 ***
t1
Reslicing /lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/output_RDCRN_test_20240619/7901-01-001/scan03/SELECTED/7901-01-001_scan03_t1.nii.gz
Bias Correcting /lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/output_RDCRN_test_20240619/7901-01-001/scan03/SELECTED/7901-01-001_scan03_t1.nii.gz
*** Processing 7901-01-001: scan02 ***
t1
Reslicing /lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/output_RDCRN_test_20240619/7901-01-001/scan02/SELECTED/7901-01-001_scan02_t1.nii.gz
Bias Correcting /lab-share/Neuro-Cohen-e2/

In [70]:
bias_corr_dict

{('7901-01-001', 'scan01'): '1590448'}

In [88]:
!squeue -u $USER 

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           1590695 bch-compu slurm_sc ch236393  R       3:22      1 compute-5-0-1
           1587644 bch-inter     bash ch236393  R    3:30:11      1 compute-5-0-3
           1590111 bch-inter     bash ch236393  R    1:22:00      1 compute-10-9


# Part 3: Co-Register and Skull Strip Images

- If you selected 'both' for skullstrip previously, please look at the Part 2 outputs and select your preferred method before running this section; you can use the below cell to update this if needed

In [None]:
skullstrip='synthstrip' #['optibet', 'synthstrip', None]

## Part 3 Functions

In [112]:
# Co-register files to the Registration Target
def co_register(working_dir, reg_image, moving_image, brain_mask=None, clean_up=True):
    
    """
    Co-registers a moving image to a registration target.

    Parameters:
    working_dir (str): The working directory.
    reg_image (str): The registration target image.
    moving_image (str): The moving image.
    skullstrip (bool): Whether to skullstrip the image. Default is True.
    clean_up (bool): Whether to clean up temporary files. Default is True.

    Returns:
    cmd (str): The command to run.
    """
    
    if not os.path.exists(f'{working_dir}/warps'):
        os.makedirs(f'{working_dir}/warps')
    
    moving_stem=os.path.basename(moving_image).split('.')[0]
    
    if os.path.exists(f"{working_dir}/{moving_stem}_space-{reg_target}.nii.gz"):
        print(f"WARNING: Input image file {moving_stem}_space-{reg_target}.nii.gz already exists. Skipping co-registration")
        return 
    
    
    cmd = f"antsRegistrationSyNQuick.sh -d 3 -m {moving_image} -f {reg_image} -t sr -o {working_dir}/warps/{moving_stem}_space-{reg_target}\n"
    
    cmd +=f"mv {working_dir}/warps/{moving_stem}_space-{reg_target}Warped.nii.gz {working_dir}/{moving_stem}_space-{reg_target}.nii.gz\n"
    
    if clean_up == True:
        cmd +=f" rm -r {working_dir}/warps\n"
    
    if brain_mask:
        cmd += f"fslmaths {brain_mask} -mul {working_dir}/{moving_stem}_space-{reg_target}.nii.gz {working_dir}/{moving_stem}_space-{reg_target}_SkullStripped.nii.gz"
                        
    return cmd

    

### Run Part 3

In [113]:
if skullstrip not in ['optibet', 'synthstrip', None]:
    raise ValueError("Please change your skullstrip variable to either 'optibet', 'synthstrip', or None")

co_reg_dict={}
for subject, sessions in subject_sessions.items():
    reg_target=None
    subject_folder = os.path.join(output_dir, subject) 
    for session in sessions:
        print(f'*** Processing {subject}: {session} ***')
        session_folder=os.path.join(subject_folder, session)
        
        reg_target=set_registration_target(glob(f'{session_folder}/SELECTED/*.nii*'))
        print(reg_target)
        
        print(f'Registering Images to the {reg_target}') #~5 min per file
        
        if not os.path.exists(f'{session_folder}/COREGISTERED'):
            os.makedirs(f'{session_folder}/COREGISTERED')
            
        reg_file=f'{session_folder}/SELECTED/{subject}_{session}_{reg_target}.nii.gz'
        shutil.copy(reg_file, f'{session_folder}/COREGISTERED')
        

        brain_mask=None
        if skullstrip != None:
            suffix = '_' + skullstrip
            brain_mask=f'{session_folder}/SELECTED/*brain-mask*{suffix}*'
        
        for file in glob(f'{session_folder}/SELECTED/*.nii*'): #this will get other files like _orig etc. 
                if reg_target not in file:
                    print(f'Registering {file}')
                    command=co_register(f'{session_folder}/COREGISTERED', reg_file, file, brain_mask, clean_up=False)    
                    job_name = f"co-register_{subject}_{session}_{reg_target}"
                    job_id=submit_slurm_job(job_name, command)
                    co_reg_dict[(subject,session)] = job_id

if len(co_reg_dict) > 0:
    print('You have', len(co_reg_dict), 'Co-Registration Jobs submitted to SLURM; subject and job IDs are stored in the co_reg_dict')
    print('You can type "squeue -u $USER" into your terminal  or "!squeue -u $USER" in a cell to track SLURM job progress')
    print('You can check the output file matching the jobid in co_reg_dict to see code outputs and any errors')
                                        

*** Processing 7901-01-001: scan01 ***
t1
Registering Images to the t1
Registering /lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/output_RDCRN_test_20240619/7901-01-001/scan01/SELECTED/7901-01-001_scan01_t2.nii.gz
*** Processing 7901-01-001: scan03 ***
t1
Registering Images to the t1
Registering /lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/output_RDCRN_test_20240619/7901-01-001/scan03/SELECTED/7901-01-001_scan03_t2.nii.gz


KeyboardInterrupt: 

In [109]:
co_reg_dict

{('7901-01-001', 'scan01'): '1591012',
 ('7901-01-001', 'scan03'): '1591013',
 ('7901-01-001', 'scan02'): '1591014',
 ('7901-01-002', 'scan01'): '1591015',
 ('7901-01-002', 'scan02'): '1591016'}

In [110]:
!squeue -u $USER

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           1587644 bch-inter     bash ch236393  R    4:15:06      1 compute-5-0-3


# Part 4: Register to MNI
- If you are planning to trace lesions, do that first

In [None]:
#nilearn.datasets.load_mni152_template() #loads T1w

### Part 4 option a: EasyReg
- 1-5 minute runtime, pre-creating the segmentations using the "robust" setting likely gives better results for large lesions
- Can register across modality (i.e. T2w to T1w)
- Citation: Iglesias, J. E. (2023). A ready-to-use machine learning tool for symmetric multi-modality registration of brain MRI. Scientific Reports, 13(1), Article 1. https://doi.org/10.1038/s41598-023-33781-0


In [103]:
def easy_reg(working_dir, source_brain, target_brain, lesion_mask=None, other_brains=[], synthseg_robust=True):

    source_name=os.path.basename(source_brain).split('.')[0]

    cmd = f'echo Running EasyReg for {source_brain}\n'
    cmd += ' source activate easyreg\n'
    cmd += 'LD_LIBRARY_PATH=/opt/ohpc/pub/mpi/openmpi3-gnu8/3.1.4/lib:/opt/ohpc/pub/compiler/gcc/8.3.0/lib64\n'
    cmd += 'CUDNN_PATH=$(dirname $(python -c "import nvidia.cudnn;print(nvidia.cudnn.__file__)"))\n'
    cmd += 'export LD_LIBRARY_PATH=$CUDNN_PATH/lib:/lab-share/Neuro-Cohen-e2/Public/environment/conda/easyreg/lib/python3.9/site-packages/tensorrt_libs/:$LD_LIBRARY_PATH:\n'

    if synthseg_robust==True:
        cmd +=f'mri_synthseg --i {source_brain} --o {working_dir}/{source_name}_synthseg.nii.gz --parc --robust\n'
        
    cmd += ' '.join([
        'mri_easyreg',
        '--ref', target_brain,
        '--flo', source_brain,
        '--ref_seg', f'0_pipeline_scripts/mni_icbm152_t1_tal_nlin_asym_09c_brain_synthseg.nii.gz',
        '--flo_seg', f'{working_dir}/{source_name}_synthseg.nii.gz',
        '--flo_reg', f'{working_dir}/{source_name}_MNI.nii.gz',
        '--fwd_field', f'{working_dir}/{source_name}_to_MNI_warp.nii.gz'
    ])
    
    if lesion_mask:
        cmd +=f'mri_easywarp --i {lesion_mask} --o {working_dir}/{source_name}_lesion_MNI.nii.gz --field {working_dir}/{source_name}_to_MNI_warp.nii.gz --nearest'
             
    if other_brains:
        for brain in other_brains:
            brain_name=os.path.basename(brain).split('.')[0]
            cmd +=f'mri_easywarp --i {brain} --o {working_dir}/{brain_name}_MNI.nii.gz --field {working_dir}/{source_name}_to_MNI_warp.nii.gz'

    return cmd
             

### Part 4 option b: ANTs SyN Regular or Quick
- Regular: 1-2 hour runtime, may do better than easyReg for brains with larger lesions/lots of deformation
- Quick: 5-15 min run time, but ANTs regular has significantly better results for lesioned brains

In [82]:
def ants(working_dir, source_brain, target_brain, lesion_mask=None, other_brains=[], transform='s', histogram_matching=False, quick=False):
    
    """
    Co-registers a moving image to a registration target.

    Parameters:
    working_dir (str): The working directory.
    reg_image (str): The registration target image.
    moving_image (str): The moving image.
    skullstrip (bool): Whether to skullstrip the image. Default is True.
    clean_up (bool): Whether to clean up temporary files. Default is True.

    Returns:
    cmd (str): The command to run.
    """
    
    if not os.path.exists(f'{working_dir}/warps'):
        os.makedirs(f'{working_dir}/warps')
    
    source_stem=os.path.basename(source_brain).split('.')[0]
    
    if os.path.exists(f"{working_dir}/{source_stem}_MNI.nii.gz"):
        print(f"WARNING: Input image file {source_stem}_MNI.nii.gz already exists. Skipping...")
        return 
    
    lesion_mask=''
    if lesion_mask:
        add_lesion_mask=f'-x {lesion_mask}'
    
    add_hist_match=''
    if histogram_matching == True:
        add_hist_match='-j 1'
    
    #The moving and fixed image are switched so that the lesion mask can be used in the registration
    # Usually the fixed image, aka target, would be the MNI brain 
    ants_cmd='antsRegistrationSyN.sh'
    if quick:
        ants_cmd='antsRegistrationSyNQuick.sh'
    
    cmd ="source /lab-share/Neuro-Cohen-e2/Public/environment/load_neuroimaging_env.sh\n""
    cmd += f"{ants_cmd} -d 3 -m {target_brain} -f {source_brain} -t {transform} {add_lesion_mask} {add_hist_match} -o {working_dir}/warps/{source_stem}_MNI_\n"
    
    cmd +=f"mv {working_dir}/warps/{source_stem}_MNI_InverseWarped.nii.gz {working_dir}/{source_stem}_MNI.nii.gz\n"
    
    if lesion_mask:
        lesion_stem=lesion_mask.split('.')[0]
        lesion_cmd = [
            'antsApplyTransforms', 
            '-d', '3', 
            '-i', f'{lesion_mask}', 
            '-r', f'{target_brain}', 
            '-t', f'[{working_dir}/warps/{source_stem}_MNI_0GenericAffine.mat, 1]', 
            '-t', f'{source_stem}_MNI_1InverseWarp.nii.gz', 
            '-n', 'NearestNeighbor', 
            '-o', f'{lesion_stem}_MNI.nii.gz'
        ]
        
        cmd += ' '.join(lesion_cmd) + '\n'

    if other_brains:
        for brain in other_brains:
            brain_stem=os.path.basename(brain).split('.')[0]
            brain_cmd += [
                'antsApplyTransforms', 
                '-d', '3', 
                '-i', f'{lesion_mask}', 
                '-r', f'{target_brain}', 
                '-t', f'[{working_dir}/warps/{source_stem}_MNI_0GenericAffine.mat, 1]', 
                '-t', f'{source_stem}_MNI_1InverseWarp.nii.gz', 
                '-n', 'Linear', 
                '-o', f'{brain_stem}_MNI.nii.gz'
            ]
        
        cmd += ' '.join(brain_cmd) + '\n'
    
                         
    return cmd

    

### Run Part 4

In [104]:
mni_reg_dict={}
for subject, sessions in subject_sessions.items():
    reg_target=None
    subject_folder = os.path.join(output_dir, subject) 
    for session in sessions:
        print(f'*** Processing {subject}: {session} ***')
        session_folder=os.path.join(subject_folder, session)
        
        reg_target=set_registration_target(glob(f'{session_folder}/COREGISTERED/*.nii*'))
        print(reg_target)
        
        if '1' in reg_target:
            target_image=f'/lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/0_pipeline_scripts/mni_icbm152_t1_tal_nlin_asym_09c_brain.nii.gz'
        elif '2' in reg_target:
            target_image=f'/lab-share/Neuro-Cohen-e2/Public/notebooks/gmiller/Pipeline/0_pipeline_scripts/mni_icbm152_t2_tal_nlin_asym_09c_brain.nii.gz'
        

        if not os.path.exists(f'{session_folder}/MNI_SPACE'):
            os.makedirs(f'{session_folder}/MNI_SPACE')
            
        source_file=f'{session_folder}/COREGISTERED/{subject}_{session}_{reg_target}.nii.gz'
        
        #look for lesion mask
        lesion_mask_files=glob(f'{session_folder}/COREGISTERED/*lesion*nii*')
        if lesion_mask_files:
            lesion_mask = lesion_mask_files[0]
        if len(lesion_mask_files) > 1:
            print(f"Warning: Multiple lesion mask files found. Using the first one: {lesion_mask}")
        else:
            lesion_mask = None
            print("No lesion mask file found.")

        #look for other brains to warp
        other_brains=glob(f'{session_folder}/COREGISTERED/*space-{reg_target}*')
        if other_brains:
            print("Found", len(other_brains), "to register to MNI space")
        else:
            print("Found no other brains to register to MNI space")
        
        print(f'Registering the {reg_target} to MNI using {mni_software}') 
        if mni_software == 'EasyReg':
            command=easy_reg(f'{session_folder}/MNI_SPACE', source_file, target_image, lesion_mask, other_brains, synthseg_robust=False)
        elif mni_software == 'ANTs':
            command=ants(working_dir, source_file, target_image, lesion_mask, other_brains, transform='s', histogram_matching=False, quick=False)
        elif mni_software == 'ANTsQuick':
            command=ants(working_dir, source_file, target_image, lesion_mask, other_brains, transform='s', histogram_matching=False, quick=True)
        else:
            print("Please set mni_software to 'EasyReg', 'ANTs', or 'ANTsQuick'")
            
        job_name = f"MNI-register_{subject}_{session}_{reg_target}"
        job_id=submit_slurm_job(job_name, command)
        mni_reg_dict[(subject,session)] = job_id
        

if len(mni_reg_dict) > 0:
    print('You have', len(mni_reg_dict), 'MNI Registration Jobs submitted to SLURM; subject and job IDs are stored in the co_reg_dict')
    print('You can type "squeue -u $USER" into your terminal  or "!squeue -u $USER" in a cell to track SLURM job progress')
    print('You can check the output file matching the jobid in co_reg_dict to see code outputs and any errors')
                                        

*** Processing c001: s01 ***


ValueError: No registration target found in []

In [91]:
mni_reg_dict

{('c001', 's01'): '1317966'}

In [90]:
!squeue -u $USER

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           1317128 bch-compu tmpz_4j_ ch236393  R      59:01      1 compute-5-0-3
           1317966 bch-compu tmp3njgj ch236393  R       0:41      1 compute-5-0-0
           1313822 bch-inter     bash ch236393  R    5:57:22      1 compute-10-9
           1316284 bch-inter     bash ch236393  R    2:12:42      1 compute-5-0-3
