# Summary

This notebook reads the metadata of the MR files in order to set a standard (pixel size, physical size) for all images.

> All input images will be resampled onto the reference domain. This domain is defined by two constraints: the number of pixels per dimension and the physical size we want the reference domain to occupy. The former is associated with the computational constraints of deep learning where using a small number of pixels is desired. The later is associated with the SimpleITK concept of an image, it occupies a region in physical space which should be large enough to encompass the object of interest.

Src: http://insightsoftwareconsortium.github.io/SimpleITK-Notebooks/Python_html/70_Data_Augmentation.html

# Imports

In [5]:
# imports

import os, sys, time
from joblib import Parallel, delayed

import numpy as np
import pandas as pd

from pandas import DataFrame as DF

import SimpleITK as sitk

from helpers_general import sitk2np, mask2sitk, print_sitk_info, round_tuple, lrange, lmap, get_roi_range, numbers2groups
from helpers_preprocess import mask2bbox, print_bbox, get_bbox_size, print_bbox_size, get_data_dict, folder2objs, \
                                threshold_based_crop, get_reference_frame, resample2reference

from helpers_metrics import compute_dice_coefficient, compute_coverage_coefficient
from helpers_viz import viz_axis

In [6]:
# auto-reload when local helper fns change
%load_ext autoreload
%autoreload 2

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Data

- Load data from folder (train_path)
- Crop to foreground
- Get standard reference domain
- Resample to sample reference domain

In [7]:
# Data path

PROJ_PATH = "."

# Folders containing MR train data
train_path = f"{PROJ_PATH}/train_data/train_data"
train_data_dict = get_data_dict(train_path)

# print train data dict
print(f"Train data folders: {numbers2groups(sorted([int(x) for x in os.listdir(train_path)]))}")
print(f"Training data (size {len(train_data_dict)}): key = train folder, value = full path to (segm obj, nii file)\n")

Train data folders: [range(50002, 50017), range(50019, 50020), 50049, range(50455, 50464)]
Training data (size 29): key = train folder, value = full path to (segm obj, nii file)



In [51]:
folders     = sorted([int(x) for x in os.listdir(train_path)])
nii_paths   = [train_data_dict[str(folder)][1] for folder in folders]

In [50]:
d = [None] * len(folders)

for i,folder in enumerate(folders):
    path = train_data_dict[str(folder)][1]
    
    reader = sitk.ImageFileReader()
    reader.SetFileName(path)
    reader.ReadImageInformation()
    
    d[i] = {
        "fn": folder,
        "sz": reader.GetSize(),
        "px": sitk.GetPixelIDValueAsString(reader.GetPixelID()),
        "sp": tuple(round(x,2) for x in reader.GetSpacing()),
        "dir": tuple(int(round(x,1)) for x in reader.GetDirection())
    }
    
d = DF(d)
d

Unnamed: 0,fn,sz,px,sp,dir
0,50002,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
1,50003,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
2,50004,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
3,50005,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
4,50006,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
5,50007,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
6,50008,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
7,50009,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
8,50010,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
9,50011,"(176, 256, 256)",16-bit signed integer,"(1.05, 1.05, 1.05)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"


In [54]:
def get_img_data(path):
    reader = sitk.ImageFileReader()
    reader.SetFileName(path)
    reader.ReadImageInformation()
    return  reader.GetSize(), reader.GetSpacing() #reader.GetDirection()

In [55]:
all_img_data = [get_img_data(path) for path in nii_paths]

In [56]:
def get_reference_frame(all_img_data):
    img_data = all_img_data
    
    dimension = 3 # 3D MRs
    pixel_id = 2 # 16-bit signed integer

    # Physical image size corresponds to the largest physical size in the training set, or any other arbitrary size.
    reference_physical_size = np.zeros(dimension)

    for img_sz, img_spc in img_data:
        reference_physical_size[:] = [(sz-1)*spc if sz*spc>mx else mx \
                                      for sz, spc, mx in zip(img_sz, img_spc, reference_physical_size)]

    print(reference_physical_size)
    # Create the reference image with a zero origin, identity direction cosine matrix and dimension     
    reference_origin = np.zeros(dimension)
    reference_direction = np.identity(dimension).flatten()


    # Isotropic (1,1,1) pixels
    reference_spacing = np.ones(dimension)
    reference_size = [int(phys_sz/(spc) + 1) for phys_sz,spc in zip(reference_physical_size, reference_spacing)]

    # Set reference image attributes
    reference_image = sitk.Image(reference_size, pixel_id)
    reference_image.SetOrigin(reference_origin)
    reference_image.SetSpacing(reference_spacing)
    reference_image.SetDirection(reference_direction)

    reference_center = np.array(reference_image.TransformContinuousIndexToPhysicalPoint(np.array(reference_image.GetSize())/2.0))
    return reference_size, pixel_id, reference_origin, reference_spacing, reference_direction, reference_center

def get_reference_image(reference_frame):
    reference_size, pixel_id, reference_origin, reference_spacing, reference_direction, reference_center = reference_frame
    reference_image = sitk.Image(reference_size, pixel_id)
    reference_image.SetOrigin(reference_origin)
    reference_image.SetSpacing(reference_spacing)
    reference_image.SetDirection(reference_direction)
    return reference_image, reference_center

In [57]:
reference_frame = get_reference_frame(all_img_data)

[190.80000758 267.94921875 267.94921875]


In [58]:
reference_frame

([191, 268, 268],
 2,
 array([0., 0., 0.]),
 array([1., 1., 1.]),
 array([1., 0., 0., 0., 1., 0., 0., 0., 1.]),
 array([ 95.5, 134. , 134. ]))

In [59]:
reference_image, reference_center = get_reference_image(reference_frame)

In [60]:
reference_center

array([ 95.5, 134. , 134. ])

In [62]:
print_sitk_info(reference_image)

Size:  (191, 268, 268)
Origin:  (0.0, 0.0, 0.0)
Spacing:  (1.0, 1.0, 1.0)
Direction:  (1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)
Pixel type: 2 = 16-bit signed integer
