# Summary

This notebook reads the metadata of the MR files in order to set a standard (pixel size, physical size) for all images.

> All input images will be resampled onto the reference domain. This domain is defined by two constraints: the number of pixels per dimension and the physical size we want the reference domain to occupy. The former is associated with the computational constraints of deep learning where using a small number of pixels is desired. The later is associated with the SimpleITK concept of an image, it occupies a region in physical space which should be large enough to encompass the object of interest.

Src: http://insightsoftwareconsortium.github.io/SimpleITK-Notebooks/Python_html/70_Data_Augmentation.html

# Imports

In [66]:
# imports

import os, sys, time, pickle
from pathlib import Path

import numpy as np
from pandas import DataFrame as DF

import SimpleITK as sitk

from helpers_general import sitk2np, print_sitk_info, round_tuple, lrange, lmap, get_roi_range, numbers2groups
from helpers_preprocess import get_reference_frame, resample2reference

In [2]:
# auto-reload when local helper fns change
%load_ext autoreload
%autoreload 2

%matplotlib inline

# Data

- Load data from folder (train_path)
- Get standard reference domain
- Resample to sample reference domain

In [27]:
# get ext (nii, dcm, hdr, etc) for each data source
exts = []

for folder in data_folders:
    with open(f"{folder}-exts.txt", "rb") as fp:   # Unpickling
        exts.append(pickle.load(fp))
        
print(*list(zip(data_folders, exts)), sep="\n")

('ABIDE', {'.nii', '.img'})
('ABVIB', {'.dcm'})
('ACRIN-FMISO-Brain', {'.dcm'})
('ADNI', {'.nii'})
('PPMI', {'.dcm'})


In [7]:
# wsl: /home/rgologorsky/DeepPit
hd_path = "../../../mnt/d/PitMRdata"

# all folders in HD
all_folders = os.listdir(hd_path)

# folders with data
data_folders = ["ABIDE", "ABVIB", "ACRIN-FMISO-Brain", "ADNI", "PPMI"]

# data src2idx
src2idx = dict((v,k) for k,v in enumerate(data_folders))

# get ext (nii, dcm, hdr, etc) for each data source
exts = []

for folder in data_folders:
    with open(f"{folder}-exts.txt", "rb") as fp:   # Unpickling
        exts.append(pickle.load(fp))
        
print(*list(zip(data_folders, exts)), sep="\n")

# get paths to MR
mr_paths = []

for folder in data_folders:
    with open(f"{folder}.txt", "rb") as fp:   # Unpickling
        mr_paths.append(pickle.load(fp))

# Input selection

# number of paths in each data folder
lens = [len(d) for d in mr_paths]
print(lens)

# choose n=3 MRs from each data folder
n = 3
train_paths = [np.random.choice(a, size=n, replace=False) for a in mr_paths]
print(*train_paths, sep="\n")

[1160, 778, 1403, 2640, 1524]
['../../../mnt/d/PitMRdata/ABIDE/ABIDE_1/50438/MP-RAGE/2000-01-01_00_00_00.0/S165020'
 '../../../mnt/d/PitMRdata/ABIDE/ABIDE/51274/Matched_Bandwidth_Hires/2000-01-01_00_00_00.0/S162911'
 '../../../mnt/d/PitMRdata/ABIDE/ABIDE_1/50304/MP-RAGE/2000-01-01_00_00_00.0/S165165']
['../../../mnt/d/PitMRdata/ABVIB/ABVIB/238/MPRAGE/2013-05-01_13_01_03.0/S343379'
 '../../../mnt/d/PitMRdata/ABVIB/ABVIB/69/t1_mpr_sag/2009-03-31_08_33_40.0/S346038'
 '../../../mnt/d/PitMRdata/ABVIB/ABVIB/60006/MPRAGE/2010-05-14_11_05_59.0/S342245']
['../../../mnt/d/PitMRdata/ACRIN-FMISO-Brain/ACRIN-FMISO-Brain-003/06-14-1960-MRIBRNWWO-49008/45.000000-DTImosaicpcrescanLOWB-17263'
 '../../../mnt/d/PitMRdata/ACRIN-FMISO-Brain/ACRIN-FMISO-Brain-003/08-29-1961-MRIBRNWWOPER-08491/1.000000-Localizer-06591'
 '../../../mnt/d/PitMRdata/ACRIN-FMISO-Brain/ACRIN-FMISO-Brain-003/06-14-1960-MRIBRNWWO-49008/66.000000-MEMPRAGEaxial-60513']
['../../../mnt/d/PitMRdata/ADNI/ADNI1_Complete_1Yr_1.5T/ADNI/011_S

In [13]:
flat_train_paths = [p for a in train_paths for p in a]
print(f"Train sz = {len(flat_train_paths)}:", *flat_train_paths, sep="\n")

Train sz = 15:
../../../mnt/d/PitMRdata/ABIDE/ABIDE_1/50438/MP-RAGE/2000-01-01_00_00_00.0/S165020
../../../mnt/d/PitMRdata/ABIDE/ABIDE/51274/Matched_Bandwidth_Hires/2000-01-01_00_00_00.0/S162911
../../../mnt/d/PitMRdata/ABIDE/ABIDE_1/50304/MP-RAGE/2000-01-01_00_00_00.0/S165165
../../../mnt/d/PitMRdata/ABVIB/ABVIB/238/MPRAGE/2013-05-01_13_01_03.0/S343379
../../../mnt/d/PitMRdata/ABVIB/ABVIB/69/t1_mpr_sag/2009-03-31_08_33_40.0/S346038
../../../mnt/d/PitMRdata/ABVIB/ABVIB/60006/MPRAGE/2010-05-14_11_05_59.0/S342245
../../../mnt/d/PitMRdata/ACRIN-FMISO-Brain/ACRIN-FMISO-Brain-003/06-14-1960-MRIBRNWWO-49008/45.000000-DTImosaicpcrescanLOWB-17263
../../../mnt/d/PitMRdata/ACRIN-FMISO-Brain/ACRIN-FMISO-Brain-003/08-29-1961-MRIBRNWWOPER-08491/1.000000-Localizer-06591
../../../mnt/d/PitMRdata/ACRIN-FMISO-Brain/ACRIN-FMISO-Brain-003/06-14-1960-MRIBRNWWO-49008/66.000000-MEMPRAGEaxial-60513
../../../mnt/d/PitMRdata/ADNI/ADNI1_Complete_1Yr_1.5T/ADNI/011_S_0005/MPR__GradWarp__B1_Correction__N3__Scaled/

In [40]:
type(flat_train_paths[0])

numpy.str_

In [42]:
type(get_folder_name(flat_train_paths[0]))

str

In [22]:
os.listdir("../../../mnt/d/PitMRdata/ABIDE/ABIDE_1/50438/MP-RAGE/2000-01-01_00_00_00.0/S165020")

['ABIDE_50438_MRI_MP-RAGE_br_raw_20120830200249492_S165020_I329030.nii']

In [50]:
def get_folder_name(s):
    s = s[len("../../../mnt/d/PitMRdata/"):]
    return s[0:s.index("/")]

# get extension
# assume all files in dir have same extension
def get_ext(dir_path):
    # assume all files in dir have same extension
    file = next(os.walk(dir_path))[2][0]
    
    # in case file is bytes not str
    try:
        file = file.decode()
    except:
        pass
    
    return Path(file).suffix.lower()

# data frame w/ meta data info
d = [None] * len(flat_train_paths)

for i,path in enumerate(flat_train_paths):
    
    # get folder name = data src
    folder = get_folder_name(path)
    
    # get file ext (nii, dcm, etc)
    idx    = src2idx[folder]
    ext    = get_ext(path)
    
    # get file
    # ASSUMES only 1 nii in folder
    
    if ext == ".nii" or ext == ".dcm":
        file = os.listdir(str(path))[0]
        file = f"{path}/{file}"
    else:
        print(f"Weird ext - {ext}.")
    
    # read meta data
    reader = sitk.ImageFileReader()
    reader.SetFileName(file)
    reader.ReadImageInformation()
    
    d[i] = {
        "folder": folder,
        "fn":  file,
        "sz": reader.GetSize(),
        "px": sitk.GetPixelIDValueAsString(reader.GetPixelID()),
        "sp": tuple(round(x,2) for x in reader.GetSpacing()),
        "dir": tuple(int(round(x,1)) for x in reader.GetDirection())
    }
    
d = DF(d)
d

Unnamed: 0,folder,fn,sz,px,sp,dir
0,ABIDE,../../../mnt/d/PitMRdata/ABIDE/ABIDE_1/50438/M...,"(160, 480, 512)",16-bit signed integer,"(1.2, 0.5, 0.5)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
1,ABIDE,../../../mnt/d/PitMRdata/ABIDE/ABIDE/51274/Mat...,"(128, 128, 34)",16-bit signed integer,"(1.5, 1.5, 4.0)","(1, 0, 0, 0, 0, 0, 0, 0, 0)"
2,ABIDE,../../../mnt/d/PitMRdata/ABIDE/ABIDE_1/50304/M...,"(256, 256, 124)",16-bit signed integer,"(1.02, 1.02, 1.2)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
3,ABVIB,../../../mnt/d/PitMRdata/ABVIB/ABVIB/238/MPRAG...,"(256, 256, 1)",16-bit unsigned integer,"(1.0, 1.0, 1.0)","(0, 0, -1, 1, 0, 0, 0, -1, 0)"
4,ABVIB,../../../mnt/d/PitMRdata/ABVIB/ABVIB/69/t1_mpr...,"(256, 256, 1)",16-bit unsigned integer,"(1.0, 1.0, 1.0)","(0, 0, -1, 1, 0, 0, 0, -1, 0)"
5,ABVIB,../../../mnt/d/PitMRdata/ABVIB/ABVIB/60006/MPR...,"(256, 256, 1)",16-bit unsigned integer,"(1.0, 1.0, 1.0)","(0, 0, -1, 1, 0, 0, 0, -1, 0)"
6,ACRIN-FMISO-Brain,../../../mnt/d/PitMRdata/ACRIN-FMISO-Brain/ACR...,"(128, 128, 1)",16-bit unsigned integer,"(1.85, 1.85, 1.85)","(1, 0, 0, 0, 1, 0, 0, 0, 1)"
7,ACRIN-FMISO-Brain,../../../mnt/d/PitMRdata/ACRIN-FMISO-Brain/ACR...,"(256, 256, 1)",16-bit signed integer,"(0.94, 0.94, 10.0)","(1, 0, 0, 0, 1, 0, 0, 0, 1)"
8,ACRIN-FMISO-Brain,../../../mnt/d/PitMRdata/ACRIN-FMISO-Brain/ACR...,"(256, 256, 1)",16-bit unsigned integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, 1, 0, 0, 0, 1)"
9,ADNI,../../../mnt/d/PitMRdata/ADNI/ADNI1_Complete_1...,"(192, 192, 160)",32-bit float,"(1.25, 1.25, 1.2)","(0, 0, 1, 0, 1, 0, -1, 0, 0)"


In [57]:
def get_img_data(path):
    # get folder name = data src
    folder = get_folder_name(path)
    
    # get file ext (nii, dcm, etc)
    idx    = src2idx[folder]
    ext    = get_ext(path)
    
    # get file
    # ASSUMES only 1 nii in folder
    
    if ext == ".nii" or ext == ".dcm":
        file = os.listdir(str(path))[0]
        file = f"{path}/{file}"
    else:
        print(f"Weird ext - {ext}.")
        
    reader = sitk.ImageFileReader()
    reader.SetFileName(file)
    reader.ReadImageInformation()
    return  reader.GetSize(), reader.GetSpacing() #reader.GetDirection()

In [58]:
all_img_data = [get_img_data(path) for path in flat_train_paths]

In [59]:
def get_reference_frame(all_img_data):
    img_data = all_img_data
    
    dimension = 3 # 3D MRs
    pixel_id = 2 # 16-bit signed integer

    # Physical image size corresponds to the largest physical size in the training set, or any other arbitrary size.
    reference_physical_size = np.zeros(dimension)

    for img_sz, img_spc in img_data:
        reference_physical_size[:] = [(sz-1)*spc if sz*spc>mx else mx \
                                      for sz, spc, mx in zip(img_sz, img_spc, reference_physical_size)]

    print(reference_physical_size)
    
    # Create the reference image with a zero origin, identity direction cosine matrix and dimension     
    reference_origin = np.zeros(dimension)
    reference_direction = np.identity(dimension).flatten()

    # Isotropic (1,1,1) pixels
    reference_spacing = np.ones(dimension)
    reference_size = [int(phys_sz/(spc) + 1) for phys_sz,spc in zip(reference_physical_size, reference_spacing)]

    # Set reference image attributes
    reference_image = sitk.Image(reference_size, pixel_id)
    reference_image.SetOrigin(reference_origin)
    reference_image.SetSpacing(reference_spacing)
    reference_image.SetDirection(reference_direction)

    reference_center = np.array(reference_image.TransformContinuousIndexToPhysicalPoint(np.array(reference_image.GetSize())/2.0))
    return reference_size, pixel_id, reference_origin, reference_spacing, reference_direction, reference_center

def get_reference_image(reference_frame):
    reference_size, pixel_id, reference_origin, reference_spacing, reference_direction, reference_center = reference_frame
    reference_image = sitk.Image(reference_size, pixel_id)
    reference_image.SetOrigin(reference_origin)
    reference_image.SetSpacing(reference_spacing)
    reference_image.SetDirection(reference_direction)
    return reference_image, reference_center

In [60]:
reference_frame = get_reference_frame(all_img_data)

[258.98309827 258.98309827 255.5       ]


In [61]:
reference_frame

([259, 259, 256],
 2,
 array([0., 0., 0.]),
 array([1., 1., 1.]),
 array([1., 0., 0., 0., 1., 0., 0., 0., 1.]),
 array([129.5, 129.5, 128. ]))

In [62]:
reference_image, reference_center = get_reference_image(reference_frame)

In [63]:
reference_center

array([129.5, 129.5, 128. ])

In [64]:
print_sitk_info(reference_image)

Size:  (259, 259, 256)
Origin:  (0.0, 0.0, 0.0)
Spacing:  (1.0, 1.0, 1.0)
Direction:  (1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)
Pixel type: 2 = 16-bit signed integer
