# Preprocessing Example

This notebook shows how to preprocess the data for the inference task. To provide a plug-and-play demonstration, we have choosen to demonstrate on IXI data. The preprocessing should be similar for ADNI

In [1]:
import torch
import nibabel as nib
import wget
import tarfile
import os
from git import Repo
import sys
import re
import pandas as pd
import json

import numpy as np

from SFCN import SFCN

## Download IXI

In [2]:
t1_link = "http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI-T1.tar"
csf_link = "http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI.xls"
tar_file = "IXI-T1.tar"
data_folder = "IXI-T1"

# Download the data
if not os.path.exists(tar_file): 
    wget.download(t1_link, out=tar_file)

    # Extract the data
    tar = tarfile.open(tar_file)
    tar.extractall(path=data_folder)
    tar.close()

if not os.path.exists("IXI.xls"):
    wget.download(csf_link, out="IXI.xls")

## Download and install HD-BET

In [3]:
if not os.path.exists("HD_BET"):
    Repo.clone_from("git@github.com:MIC-DKFZ/HD-BET.git", "HD_BET")

    # Install HD-BET
    os.chdir("HD_BET")
    os.system("pip install -e .")
    os.chdir("..")

# Add HD_BET to the path
sys.path.append("HD_BET")

In [4]:
import HD_BET.run

## Skullstrip
Skullstrip each image using HD-BET and save the results in bids format.

In [5]:
data_folder_skullstripped = "IXI-T1-skullstripped/bids"
if not os.path.exists(data_folder_skullstripped):
    os.makedirs(data_folder_skullstripped, exist_ok=True)

files_missing_skullstripping = []
files_missing_skullstripping_out = []

# Set the limit of files to be skullstripped
n_limit = 5

# Collect the files that need to be skullstripped
for scan_file in os.listdir(data_folder):
    if len(files_missing_skullstripping) >= n_limit:
        break
    if scan_file.endswith(".nii.gz"):
        # Extract the subject id
        subject_id = re.findall(r"IXI[0-9]+", scan_file)[0]

        subject_folder = os.path.join(data_folder_skullstripped, "sub-" + subject_id)

        if not os.path.exists(subject_folder):
            os.makedirs(subject_folder)

        session_folder = os.path.join(subject_folder, "ses-01")

        if not os.path.exists(session_folder):
            os.makedirs(session_folder)
            os.makedirs(os.path.join(session_folder, "anat"))
            files_missing_skullstripping.append(os.path.join(data_folder, scan_file))

            skullstripped_file = session_folder + "/anat/sub-" + subject_id + "_ses-01_T1w_brain.nii.gz"
            files_missing_skullstripping_out.append(skullstripped_file)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Skullstrip the files

HD_BET.run.run_hd_bet(files_missing_skullstripping, files_missing_skullstripping_out, mode="fast",
                        device=device, postprocess=False, do_tta=True, keep_mask=False, overwrite=True)




File: IXI-T1/IXI477-IOP-1141-T1.nii.gz
preprocessing...
image shape after preprocessing:  (117, 160, 160)
prediction (CNN id)...
0
exporting segmentation...
File: IXI-T1/IXI622-Guys-1102-T1.nii.gz
preprocessing...
image shape after preprocessing:  (120, 160, 160)
prediction (CNN id)...
0
exporting segmentation...
File: IXI-T1/IXI546-HH-2450-T1.nii.gz
preprocessing...
image shape after preprocessing:  (120, 160, 160)
prediction (CNN id)...
0
exporting segmentation...
File: IXI-T1/IXI459-Guys-0992-T1.nii.gz
preprocessing...
image shape after preprocessing:  (120, 160, 160)
prediction (CNN id)...
0
exporting segmentation...
File: IXI-T1/IXI214-HH-1636-T1.nii.gz
preprocessing...
image shape after preprocessing:  (120, 160, 160)
prediction (CNN id)...
0
exporting segmentation...


## Preprocess
* Crop the top 5 percentiles of the image intensity
* Scale the image to the interval [0, 1]
* Convert the images to int8 to improve memory usage and loading speed

In [6]:
for subject in os.listdir(data_folder_skullstripped):
    for session in os.listdir(os.path.join(data_folder_skullstripped, subject)):
        # Check that there exists a preprocessed T1w file
        preprocess_file = os.path.join(data_folder_skullstripped, subject, session, "anat", subject + '_' +  session + "_T1w_preprocessed_brain.nii.gz")
        if not os.path.exists(preprocess_file):
            # Load the T1w file
            t1w_file = os.path.join(data_folder_skullstripped, subject, session, "anat", subject + '_' + session + "_T1w_brain.nii.gz")

            t1w_nib = nib.load(t1w_file)

            t1w = t1w_nib.get_fdata()
            # Clip the top 5 percent of the intensities within the brain
            percentile_95 = np.percentile(t1w[t1w > 0], 95)
            
            t1w = np.clip(t1w, 0, percentile_95)

            # Normalize the intensities to the interval [0, 1]
            t1w = t1w / percentile_95

            # Convert to int8
            t1w = (t1w * 255).astype(np.uint8)

            # Save as nifti
            nib.save(nib.Nifti1Image(t1w, t1w_nib.affine), preprocess_file)

# Create A JSON Dataset
Save the subjects in a json file for easy loading later.

In [7]:
df = pd.read_excel("IXI.xls")

# Get the name and type of the columns
for col in df.columns:
    print(col, df[col].dtype)


IXI_ID int64
SEX_ID (1=m, 2=f) int64
HEIGHT int64
WEIGHT int64
ETHNIC_ID int64
MARITAL_ID int64
OCCUPATION_ID int64
QUALIFICATION_ID int64
DOB object
DATE_AVAILABLE int64
STUDY_DATE datetime64[ns]
AGE float64


In [8]:
preprocessed_subjects = {}
for subject in os.listdir(data_folder_skullstripped):
    for session in os.listdir(os.path.join(data_folder_skullstripped, subject)):
        # Check that there exists a preprocessed T1w file
        preprocess_file = os.path.join(data_folder_skullstripped, subject, session, "anat", subject + '_' +  session + "_T1w_preprocessed_brain.nii.gz")
        if os.path.exists(preprocess_file):
            subject_id = int(re.findall(r"IXI([0-9]+)", subject)[0])
            preprocessed_subjects[subject_id] = preprocess_file

# Create a dataframe with the preprocessed subjects
df_preprocessed = df[df["IXI_ID"].isin(preprocessed_subjects.keys())].copy()

# Add the image paths to the dataframe
df_preprocessed["image"] = df_preprocessed["IXI_ID"].map(preprocessed_subjects)

In [9]:
# Save as json
df_preprocessed['STUDY_DATE'] = df_preprocessed['STUDY_DATE'].astype(str)
df_preprocessed_dict = df_preprocessed.to_dict(orient="records")

with open("IXI_preprocessed.json", "w") as f:
    json.dump(df_preprocessed_dict, f, indent=4)
