### ~ 7 mins

In [24]:
#default_exp normalize

### normalize image and skull_stripped_image for training

In [1]:
#export
from fastai2.medical.imaging_roi import *
from fastai2.medical.imaging import dicom_windows
from fastai2 import *
from fastai2.torch_core import *
from fastai2.core import *
from fastai2.basics import *

In [2]:
from local.data_prep import _plot_voxel

In [33]:
os.environ['YAML_DATA']="/home/turgutluk/Vent_Seg_Project/dev/data.yaml"

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [4]:
# Patient Position Attribute - https://dicom.innolitics.com/ciods/raw-data/general-series/00185100
# We don't normalize it by patient position and leave it as original
# This in a way acts as an inherent data augmentation

### normalize images

In [5]:
# Normalize images with mean std normalization at every pixel
# Normalize skull stripped images with mean std calculated inside brain region, 
# set non-brain region as 0

In [14]:
#export
def _normalize(t, mean, std):
    "normalization func"
    t = torch.clamp((t - mean) / std, -5, 5)
    _min, _max = torch.min(t), torch.max(t)
    return (t - _min) / (_max - _min)

In [15]:
#export
def _normalize_images_and_save(o):
    "Normalizes individual images to 0-1 scale and save"
    # read image
    t = torch.load(o)
    # normalize
    std,mean = torch.std_mean(t)
    t = _normalize(t, mean, std)
    # save
    p = o.parent
    suid = o.name.split('_')[0]
    torch.save(t, p/f"{suid}_image_normalized.pt")

### normalized skull stripped images

In [16]:
#export
def _normalize_skull_stripped_images_and_save(o):
    "Normalizes individual skull stripped images to 0-1 scale and save"
    # read image and mask
    t = torch.load(o)
    p = o.parent
    suid = o.name.split('_')[0]
    msk = torch.load(p/f"{suid}_brain_mask.pt")
    # normalize
    std,mean = torch.std_mean(t[msk.bool()])
    t = _normalize(t, mean, std)*msk
    # save
    torch.save(t, p/f"{suid}_skull_stripped_image_normalized.pt")

### test: normalize

In [26]:
output_path = Path("/home/turgutluk/data/ventricles_data/")

In [27]:
files = get_files(output_path, extensions=['.pt'])

In [19]:
image_files = [o for o in files if ('image' in o.name) and ('skull' not in o.name)]
skull_stripped_image_files = [o for o in files if ('image' in o.name) and ('skull' in o.name)]

In [20]:
len(image_files), len(skull_stripped_image_files)

(2501, 2501)

In [21]:
# 3 mins
parallel(_normalize_images_and_save, image_files, n_workers=defaults.cpus//2)

(#2501) [None,None,None,None,None,None,None,None,None,None...]

In [22]:
# 4 mins
parallel(_normalize_skull_stripped_images_and_save, skull_stripped_image_files, n_workers=defaults.cpus//2)

(#2501) [None,None,None,None,None,None,None,None,None,None...]

### script

In [None]:
# export 
import yaml
with open(os.environ.get('YAML_DATA', '../data.yaml')) as f: io = yaml.load(f.read(), yaml.FullLoader)

output_paths = types.SimpleNamespace(
    ATLAS=io['output']['ATLAS'],
    MR=io['output']['MR'],
    CT=io['output']['CT'])

In [None]:
#export 
from time import perf_counter
@call_parse
def main(output_path:Param("Directory that have data prep results", str)):
    "Read tensors, normalize images and skull stripped images"
    start = perf_counter()
    
    output_path = Path(output_paths.__dict__[output_path])
    files = get_files(output_path, extensions=['.pt'])
    image_files = [o for o in files if ('image' in o.name) and ('skull' not in o.name)]
    skull_stripped_image_files = [o for o in files if ('image' in o.name) and ('skull' in o.name)]
    parallel(_normalize_images_and_save, image_files, n_workers=defaults.cpus//2)
    parallel(_normalize_skull_stripped_images_and_save, skull_stripped_image_files, n_workers=defaults.cpus//2)
    
    end = perf_counter()
    print(f"Total time taken {end-start} seconds")

### Export

In [2]:
from local.notebook.export import notebook2script
notebook2script("1c) normalization.ipynb")

Converted 1c) normalization.ipynb.
