In [None]:
# -*- coding: utf-8 -*-
import sys; print('Python %s on %s' % (sys.version, sys.platform))
import os
import time
import json
from glob import glob, iglob
from tqdm import tqdm
import matplotlib.pyplot as plt

import numpy as np; print('numpy', np.__version__)
import pandas as pd; print('pandas', pd.__version__)
import cv2; print('opencv2', cv2.__version__)
import SimpleITK as itk; print(itk.Version())
import scipy.ndimage; print('scipy', scipy.__version__)
import skimage; print('skimage', skimage.__version__)
from skimage import morphology, measure, segmentation, filters

import settings
import helper

# Lung Extraction (Testset）

In [None]:
list_meta_test = []
for src_path in iglob(settings.RAW_TEST_DIR + '*.mhd'):
    start_time = time.time()
    # load lung and get property, convert to numpy.ndarray
    itk_img = itk.ReadImage(src_path)
    lung_id = src_path.split('/')[-1].strip('.mhd')
         
    origin = np.array(itk_img.GetOrigin())
    spacing = np.array(itk_img.GetSpacing())   
    rescale = spacing / settings.TARGET_VOXEL_MM
    
    lung_array = itk.GetArrayFromImage(itk_img)
    num_z, height, width = lung_array.shape
    width_res = int(np.round(width * rescale[0]))
    height_res = int(np.round(height * rescale[1]))
    slice_res = int(np.round(num_z * rescale[2]))
    resample_size = np.array([width_res, height_res, slice_res])
    
    print('lung:', lung_id, type(lung_array), lung_array.shape, '(z,y,x)')
    print('resample:', type(resample_size), resample_size, '(x,y,z)')
    print('origin:', type(origin), origin, '(x,y,z)')
    print('spacing:', type(spacing), spacing, '(x,y,z)')
    print('rescale:', type(rescale), rescale, '(x,y,z)')
    
    # meta
    meta = {
        'seriesuid': lung_id,
        'width': width_res,
        'height': height_res,
        'slice': slice_res,
        'originX': origin[0],
        'originY': origin[1],
        'originZ': origin[2],
        'spacingX': spacing[0],
        'spacingY': spacing[1],
        'spacingZ': spacing[2]
    }
    
    # resample by scipy
    real_resize_factor = np.array([rescale[2], rescale[1], rescale[0]]) # z,y,x
    lung_array_res = scipy.ndimage.interpolation.zoom(lung_array, real_resize_factor, mode='nearest')
    print('resampled lung:', lung_id, type(lung_array_res), lung_array_res.shape, '(z,y,x)')
    print('time:', time.time() - start_time); time.sleep(1)
    
    # normalize in lung window
    ww, wl = helper.get_window_size('lung')
    print('lung_array_res_lnorm:', ww, wl)
    lung_array_res_lnorm = helper.normalize(lung_array_res, ww=ww, wl=wl)
    
    # normalize in mediastinal window
    ww, wl = helper.get_window_size('mediastinal')
    print('lung_array_res_mnorm:', ww, wl)
    lung_array_res_mnorm = helper.normalize(lung_array_res, ww=ww, wl=wl)
        
    # persist slice to disk
    if not os.path.isdir(settings.PREPROCESS_TEST_DIR + lung_id):
        os.mkdir(settings.PREPROCESS_TEST_DIR + lung_id)
        
    # segment lung
    cap_sum_lung = 0
    cap_sum_medi = 0
    for i in tqdm(range(lung_array_res.shape[0])): #z,y,x
        img = lung_array_res[i,:,:]
        
        # segment
        visible = ((i+1)%(16*rescale[2])==0) & True
        if visible:
            print('slice no:', i)
            
        maskl, capacity = helper.get_segmented_lung(lung_array_res_lnorm[i,:,:], mode='lung', plot=False)
        cap_sum_lung += capacity
        
        maskm, capacity = helper.get_segmented_lung(lung_array_res_mnorm[i,:,:], mode='medi', plot=visible)
        cap_sum_medi += capacity
        
        
        # lung window slice
        cv2.imwrite(settings.PREPROCESS_TEST_DIR + lung_id + f'/{str(i).zfill(4)}.png', 
                    lung_array_res_lnorm[i,:,:])
        
        # mediastinal window slice
        cv2.imwrite(settings.PREPROCESS_TEST_DIR + lung_id + f'/{str(i).zfill(4)}_medi.png', 
                    lung_array_res_mnorm[i,:,:])
        
        # mask slice
        cv2.imwrite(settings.PREPROCESS_TEST_DIR + lung_id + f'/{str(i).zfill(4)}_maskl.png', 
                    maskl.astype(int)*255)   
        cv2.imwrite(settings.PREPROCESS_TEST_DIR + lung_id + f'/{str(i).zfill(4)}_maskm.png', 
                    maskm.astype(int)*255)
    
    meta['segmented_lung'] = float(cap_sum_lung / np.product(resample_size))
    meta['segmented_medi'] = float(cap_sum_medi / np.product(resample_size))
    
    list_meta_test.append(meta)
    
    print('-'*40, lung_id, meta['segmented_lung'], meta['segmented_medi'], len(list_meta_train), 'done', '\n')
    

In [None]:
columns = [k for k, v in list_meta_test[0].items()]
columns

In [None]:
df_meta_test = pd.DataFrame(list_meta_test, columns=columns)
df_meta_test = df_meta_test.set_index('seriesuid')
df_meta_test.to_csv(settings.PREPROCESS_TEST_META_FILE, encoding='utf-8')


In [None]:
assert len(list_meta_test) == len(glob(settings.RAW_TEST_DIR + '*.mhd'))