<a href="https://colab.research.google.com/github/IAmSuyogJadhav/Brainy/blob/master/Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preprocessing for Brainy

<center><img src="https://www.suyogjadhav.com/images/thumbs/brainy.png" height=100 width=100></center>

- &#9745; N4 Bias Correction
- &#9744; Skull Stripping
- &#9744; Normalization

# Colab Stuff
Things that need to be done once per Google Colaboratory session.

Mount the drive

In [0]:
from google.colab import drive
drive.mount('/gdrive')
# !cd /gdrive/"My Drive" && ls

Extract the dataset files

In [0]:
import zipfile
zfile = zipfile.ZipFile("/gdrive/My Drive/BRATS2015.zip")
zfile.extractall()

Install the required libraries

In [0]:
# !pip install deepbrain
!pip install simpleitk
!pip install nipype[all]

Install pre-built ANTs binaries

In [0]:
!7z x '/gdrive/My Drive/ANTs-28-03-2019.7z'
!cp bin/* /usr/local/bin

Workaround for a bug in ANTs


In [0]:
!ln -s '/gdrive/My Drive/' drive

# Imports
Import necessary packages

In [0]:
import SimpleITK as sitk
import os
from pathlib import Path
from nipype.interfaces.ants import N4BiasFieldCorrection
import glob
import numpy as np
import shutil
import math
import concurrent.futures as executor
# import matplotlib.pyplot as plt
# import deepbrain

# Utils
A few functions.

## Helper functions for gathering all the examples in the dataset

In [0]:
def getImagePathsDict(train=True):
    """
    getImagePathsDict(train=True)
    -----------------------------
    Returns a dictionary mapping all the scans with a dictionary 
    mapping all its modalities (t1, t2, t1c, flair) to their paths.
    
    Parameters
    ----------
      `train`:
          Boolean, defaults to True. 
          If train is set to True, uses the training directory. Otherwise 
          uses the testing directory. When train is True, an additional key
          "ot" pointing to the ground truth image is also provided.

    Remarks
    -------
      Made for BRATS2015 dataset.
      The following global variables need to be defined.
        `data_path`: Path to the BRATS2015 data folder.
        `save_path`: Path to the directory to store corrected images.
    """

    # Check for necessary global variables
    assert 'data_path' in globals(), "Please define a global variable data_path"\
  "with Path to the BRATS2015 data folder"
    assert 'save_path' in globals(), "Please define a global variable save_path"\
  "with Path to the directory to store corrected images"

    # Define some necessary variables
    if train:
      data_path = os.path.join(data_path, 'training/')
      folders = ['HGG', 'LGG']
    else:
      data_path = os.path.join(data_path, 'testing/')
      folders = ['HGG_LGG']

    # Collect all the images from the nested folders
    image_folders = [
        [
            f'{folder}/{subfolder}',  # The path to the image folder
            shutil.os.listdir(f'{data_path}/{folder}/{subfolder}')  # The images
        ] 
        for folder in folders for subfolder in shutil.os.listdir(
            f'{data_path}/{folder}'
        )
    ]

    out = {}
    for item in image_folders:
      images = {}
      images['t1'] = [s for s in item[1] if 'T1' in s][0]
      images['t1c'] = [s for s in item[1] if 'T1c' in s][0]
      images['t2'] = [s for s in item[1] if 'T2' in s][0]
      images['flair'] = [s for s in item[1] if 'Flair' in s][0]
      if train:
        images['ot'] = [s for s in item[1] if 'OT' in s][0]

      path = item[0]
      out[path] = images

    return out


def getImagePathsList(train=True):
    """
    getImagePathsList(train=True)

    Parameters
    ----------
      `train`: 
          Boolean, defaults to True. 
          Returns a list containing paths to all the images in the dataset.
          If train is set to True, uses the training directory, otherwise
          uses the testing directory.

    Remarks
    -------
      Made for BRATS2015 dataset.
      The following global variables need to be defined.
        `data_path`: Path to the BRATS2015 data folder.
        `save_path`: Path to the directory to store corrected images.
    """

    # Check for necessary global variables
    assert 'data_path' in globals(), "Please define a global variable data_path"\
  "with Path to the BRATS2015 data folder"
    assert 'save_path' in globals(), "Please define a global variable save_path"\
  "with Path to the directory to store corrected images"

    cur_dir = os.path.realpath('.')  # Get current directory path
    os.chdir(data_path)  # Change to the dataset directory

    # Get all the .mha images
    out = glob.glob('training/*/*/*.mha' if train else 'testing/*/*/*.mha')
    os.chdir(cur_dir)  # Change back to the original directory path
    return out

## N4 Bias Correction
Uses ANTs' `N4BiasCorrection` implemented in `Nipype`. Also uses parallelization from Python's `concurrent` module to process multiple images at once by distributing them across available physical cores. Currently the fastest method available.

In [0]:
def N4BiasCorrectANTs(image_path, out=False):
    """
    N4BiasCorrectANTs(image_path, out=False)
    -----------------------------
    Corrects an image located at `image_path` inside the directory defined by 
    `data_path`.
    
    Parameters
    ----------
      `image_path`: 
          String, required.
          Path to the image to be corrected, starting with
          `training/` or `testing/`. The parent directory is obtained
          from the global variable `data_path`.
      `out`:
          Boolean, optional. Defaults to False. 
          If set to True, returns the path to the corrected image.

    Remarks
    -------
      Made for BRATS2015 dataset.
      The following global variables need to be defined.
        `data_path`: Path to the BRATS2015 data folder.
        `save_path`: Path to the directory to store corrected images.
    """

    # Check for necessary global variables
    assert 'data_path' in globals(), "Please define a global variable data_path"\
  "with Path to the BRATS2015 data folder"
    assert 'save_path' in globals(), "Please define a global variable save_path"\
  "with Path to the directory to store corrected images"

    # Skip a file if already done.
    if os.path.isfile(os.path.join(save_path, image_path)):
      print(f'\r skipped {i}', end='')
      return
    else:
      os.makedirs(  # Create necessary folders
            os.path.dirname(os.path.join(save_path, image_path)),
            exist_ok=True
        )
    
    # Correct the image using ANTs' N4BiasCorrection
    correct = N4BiasFieldCorrection()
    correct.inputs.input_image = os.path.join(data_path, image_path)
    correct.inputs.output_image = os.path.join(save_path, image_path)
    done = correct.run()
    
    if out:
      return done.outputs.output_image


def correctImagesANTs(train=True):
    """
    correctImagesANTs(image_path)
    ----------------------------
    Corrects all the images from `data_path` and saves them in the
    `save_path`, preserving the directory structure. 
    
    Parameters
    ----------
      `train`: 
          Boolean, defaults to True. 
          If train is set to True, uses the training directory, otherwise
          uses the testing directory.
    Remarks
    -------
      Made for BRATS2015 dataset.
      The following global variables need to be defined.
        `data_path`: Path to the BRATS2015 data folder.
        `save_path`: Path to the directory to store corrected images.
    """
    
    # Check for necessary global variables
    assert 'data_path' in globals(), "Please define a global variable data_path"\
  "with Path to the BRATS2015 data folder"
    assert 'save_path' in globals(), "Please define a global variable save_path"\
  "with Path to the directory to store corrected images"

    # Get paths for all the images of all the types in the dataset
    images = getImagePathsList(train=train)

    # Process the images in parallel, across the available cores
    with executor.ProcessPoolExecutor() as ex:
      ex.map(N4BiasCorrectANTs, images)

# Preprocessing

## Bias Field Correction
&lt;DONE>

In [0]:
#@title Set Paths

#@markdown Fill in the following fields:
#@markdown Data Path: Path to the BRATS2015 directory
data_path="/content/BRATS2015/"  #@param {type: "string"}

#@markdown Save Path: Path to the directory to store corrected images. Note: Avoid spaces in the path
save_path='/content/drive/BRATS2015_corr_final/'  #@param {type: "string"}


In [0]:
#@title This needs to be performed only once. Done already?
#@markdown Tick the relevant boxes if performed already.

done_for_train = False #@param {type:"boolean"}
done_for_test = False #@param {type:"boolean"}

i = -1  # Reset progress bar

if not done_for_train:
  correctImagesANTs(train=True)  # For train
  i = -1  # Reset progress bar

if not done_for_test:
  correctImagesANTs(train=False)  # For test
  i = -1  # Reset progress bar

## Skull Stripping
&lt;TODO>

## Normalization
&lt;TODO>

# Scratchpad

# Legacy Code

Old, slow, no-longer-to-be-used code.

## N4 Bias Correction

### SITK, Without Parallelization
(Slowest, don't use.)

In [0]:
# def N4BiasCorrect(img):
#     maskImage = sitk.OtsuThreshold(img, 0, 1, 200)  # From SITK Docs
#     img = sitk.Cast(img, sitk.sitkFloat32)
#     corrector = sitk.N4BiasFieldCorrectionImageFilter()
#     output = corrector.Execute(img, maskImage)
#     return output


# def correctImages(train=True):
#   if train:
#     data_path="BRATS2015/training"
#     folders=['HGG', 'LGG']
#     save_path="BRATS2015_corrected/training"
#   else:
#     data_path="BRATS2015/testing"
#     folders=['HGG_LGG']
#     save_path="BRATS2015_corrected/testing"

#   # Get paths for all the images of all the types in the dataset
#   images = getImagePaths(train=train)

# #   corrected = 
#   # Just some variables for progress bar
#   m = len(images)
#   step = 25 / m
#   i = -1
#   for path, mris in images.items():
#     i += 1

#     t1 = N4BiasCorrect(sitk.ReadImage(f'{data_path}/{path}/{mris["t1"]}'))
#     t1c = N4BiasCorrect(sitk.ReadImage(f'{data_path}/{path}/{mris["t1c"]}'))
#     t2 = N4BiasCorrect(sitk.ReadImage(f'{data_path}/{path}/{mris["t2"]}'))
#     flair = N4BiasCorrect(sitk.ReadImage(f'{data_path}/{path}/{mris["flair"]}'))

#     if train:
#       ot = N4BiasCorrect(sitk.ReadImage(f'{data_path}/{path}/{mris["ot"]}'))
#       sequences = ['t1', 't1c', 't2', 'flair', 'ot']
#     else:
#       sequences = ['t1', 't1c', 't2', 'flair']

#     shutil.os.makedirs(f'{save_path}/{path}')

#     for sequence in sequences:
#       sitk.WriteImage(eval(sequence), f"{save_path}/{path}/{mris[sequence]}")

#     print('\r' + f'Progress: '
#             f"[{'=' * int((i+1) * step) + ' ' * (24 - int((i+1) * step))}]"
#             f"({math.ceil((i+1) * 100 / m)} %)",
#             end='')


### SITK, With Parallelization
(Faster than earlier, but still quite slow. Don't use.)

In [0]:
# # For progress bar
# i = -1
# m = None
# step = None

# def N4BiasCorrectParallel(image_path):
#     global i, m, step
#     i+= 1 
    
# #     print('CHK 1')  #DEBUG
    
#     # Skip an image if it is already corrected
#     # or is being worked upon    
#     if os.path.isfile(os.path.join(save_path, image_path)) \
#         or os.path.isfile(os.path.join(save_path, image_path + '.working')):
#       print(f'\r skipped {i}', end='')
#       return
#     else:  # Reserve the filename.
#       try:  # If folder doesn't exist
#         os.makedirs(  # Create necessary folders
#             os.path.dirname(os.path.join(save_path, image_path))
#         )
#       except FileExistsError:  # If the folder exists
#         pass
      
#       # Let others know this file is being worked upon
#       Path(os.path.join(save_path, image_path + '.working')).touch()
      
# #     print('CHK 2')  #DEBUG
#     img = sitk.ReadImage(os.path.join(data_path, image_path), sitk.sitkFloat64)
#     output = sitk.N4BiasFieldCorrection(img, img > 0)
    
#     os.remove(os.path.join(save_path, image_path + '.working'))  # Remove the empty file
    
#     sitk.WriteImage(  # Save output image
#         output, 
#         os.path.join(save_path, image_path)
#     )
    
#     # print('CHK 5')  #DEBUG
    
#     # Print Progress
#     print('\r' + f'Progress: '
#         f"[{'=' * int((i+1) * step) + ' ' * (24 - int((i+1) * step))}]"
#         f"({math.ceil((i+1) * 100 / m)} %)",
#         end='')   


# def correctImagesParallel(train=True):
  
#   global m, step  # For progress bar
#   # Get paths for all the images of all the types in the dataset
#   images = getImagePathsList(train=train)
  
#   # Just some variables for progress bar
#   m = len(images)
#   step = 25 / m

#   with executor.ProcessPoolExecutor() as ex:
#     ex.map(N4BiasCorrectParallel, images)