## Zip extraction
This notebook extracts volumes from zip provided in MOOD Challenge. Resizes volumes to (256,160,160) and saves them as namedtuples as consumed by the dataloaders in 1. Latent Spatial Reconstructions - MOOD Brain dataset

In [2]:
from zipfile import ZipFile 
import nibabel as nib
import io
import os
import numpy as np
from skimage.transform import resize

In [3]:
import pickle
from collections import namedtuple

In [4]:
def save_file(case_id, vol, tgt_dir):
    
    target_file = os.path.join(tgt_dir, case_id + ".nt")
    
    x_tmp = [img_extended(vol, None, None, None, None, case_id)]
    pickle.dump(x_tmp, open(target_file, 'wb'))

img_extended = namedtuple('img_extended',('img','seg','k','t','coord','cid'))


In [None]:
# Directory where the processed volumes are saved
tgt_dir = ''

# Location of mood challenge brain zip
src_zip = ''

In [None]:
with ZipFile(src_zip) as myzip:

    for i,file_name in enumerate(myzip.namelist()):
        if file_name.split('.')[-1] == 'gz':
            file = myzip.read(file_name)
            f = open('./tmp_file.nii.gz','wb')
            f.write(file)
            
            vol = nib.load('./tmp_file.nii.gz')
            vol = np.asarray(vol.get_fdata())
            vol = resize(vol, (160, 160, 256))
            vol = vol.transpose((2,1,0))
            vol = vol[:, ::-1, :]
            vol = (vol * 255).astype('uint8')
            case_id = file_name.split('.')[0].split('/')[1]
            
            save_file(case_id, vol, tgt_dir)

20 volumes are moved manually from training directory to a different directory as a holdout dataset. The following code normalizes the MRI images using full-volume statistics and saves them to `preprocessed_data`

In [5]:
mri_sample = namedtuple('mri_sample', ('img', 'seg', 'k', 't', 'coord', 'cid', 'empty_mask'))

In [None]:
!mkdir preprocessed_data

In [None]:
def offline_preprocessing(img, slice_offset=20):
    """
        Compute the mask of ignored slices and normalize
        slices using full-volume statistics
    """
    empty_mask = ~np.all(img == 0, axis=(1,2))

    empty_mask[:slice_offset] = False
    empty_mask[-slice_offset:] = False

    statistics_mask = np.where(img > 0.05)
    mu, std = img[statistics_mask].mean(), img[statistics_mask].std()

    img = img.astype(np.float32)

    normalization_mask = np.where(img > 0.05)
    img[normalization_mask] = (img[normalization_mask] - mu) / std

    return img, empty_mask

In [6]:
import glob

# Perform additiona preprocessing (currently done in the dataloader)
slice_offset = 20
filenames = glob.glob("brain_data/*.nt")

for file_path in filenames:
    filename = file_path.split("/")[-1]
    item = pickle.load(open(file_path, "rb")).pop()

    img, empty_mask = offline_preprocessing(item.img)

    sample = mri_sample(
        img,
        None,
        None,
        None,
        None,
        None,
        empty_mask
    )

    pickle.dump(sample, open(f"preprocessed_data/{filename}", "wb"))

In [None]:
!tar -czf preprocessed_data.tar.gz preprocessed_data 