In [1]:
import os
import shutil
import numpy as np
from scipy.io import loadmat
import numpy as np
import h5py
import pandas
import scipy
from scipy.ndimage.interpolation import zoom
from skimage import measure
import SimpleITK as sitk
from scipy.ndimage.morphology import binary_dilation,generate_binary_structure
from skimage.morphology import convex_hull_image
import pandas
from multiprocessing import Pool
from functools import partial
import sys
import warnings 
warnings.filterwarnings("ignore")

from utils.step1 import step1_python
from utils.config_training import config


In [2]:
from utils.imports import *

Using TensorFlow backend.


In [11]:
def load_train(data_path):
    folders = [x for x in os.listdir(data_path) if 'subset' in x]
    os.chdir(data_path)
    patients = []    
    for i in folders:
        os.chdir(data_path + i)
        patient_ids = [x for x in os.listdir(data_path + i) if '.mhd' in x]
        for id in patient_ids:
            j = '{}/{}'.format(i, id)
            patients.append(j)
    return patients

def get_filename(file_list, case):
    for f in file_list:
        if case in f:
            return(f)
        
def resample(imgs, spacing, new_spacing,order=2):
    if len(imgs.shape)==3:
        new_shape = np.round(imgs.shape * spacing / new_spacing)
        true_spacing = spacing * imgs.shape / new_shape
        resize_factor = new_shape / imgs.shape
        imgs = zoom(imgs, resize_factor, mode = 'nearest',order=order)
        return imgs, true_spacing
    elif len(imgs.shape)==4:
        n = imgs.shape[-1]
        newimg = []
        for i in range(n):
            slice = imgs[:,:,:,i]
            newslice,true_spacing = resample(slice,spacing,new_spacing)
            newimg.append(newslice)
        newimg=np.transpose(np.array(newimg),[1,2,3,0])
        return newimg,true_spacing
    else:
        raise ValueError('wrong shape')
def worldToVoxelCoord(worldCoord, origin, spacing):
     
    stretchedVoxelCoord = np.absolute(worldCoord - origin)
    voxelCoord = stretchedVoxelCoord / spacing
    return voxelCoord

def load_itk_image(filename):
    with open(filename) as f:
        contents = f.readlines()
        line = [k for k in contents if k.startswith('TransformMatrix')][0]
        transformM = np.array(line.split(' = ')[1].split(' ')).astype('float')
        transformM = np.round(transformM)
        if np.any( transformM!=np.array([1,0,0, 0, 1, 0, 0, 0, 1])):
            isflip = True
        else:
            isflip = False

    itkimage = sitk.ReadImage(filename)
    numpyImage = sitk.GetArrayFromImage(itkimage)
     
    numpyOrigin = np.array(list(reversed(itkimage.GetOrigin())))
    numpySpacing = np.array(list(reversed(itkimage.GetSpacing())))
     
    return numpyImage, numpyOrigin, numpySpacing,isflip

def process_mask(mask):
    convex_mask = np.copy(mask)
    for i_layer in range(convex_mask.shape[0]):
        mask1  = np.ascontiguousarray(mask[i_layer])
        if np.sum(mask1)>0:
            mask2 = convex_hull_image(mask1)
            if np.sum(mask2)>1.5*np.sum(mask1):
                mask2 = mask1
        else:
            mask2 = mask1
        convex_mask[i_layer] = mask2
    struct = generate_binary_structure(3,1)  
    dilatedMask = binary_dilation(convex_mask,structure=struct,iterations=10) 
    return dilatedMask


def lumTrans(img):
    lungwin = np.array([-1200.,600.])
    newimg = (img-lungwin[0])/(lungwin[1]-lungwin[0])
    newimg[newimg<0]=0
    newimg[newimg>1]=1
    newimg = (newimg*255).astype('uint8')
    return newimg

def savenpy_luna(id,annos,filelist,luna_segment,luna_data,savepath):
    islabel = True
    isClean = True
    resolution = np.array([1,1,1])
#     resolution = np.array([2,2,2])
    name = filelist[id]
    print(name)
    Mask,origin,spacing,isflip = load_itk_image(os.path.join(luna_segment,name+'.mhd'))
    if isflip:
        Mask = Mask[:,::-1,::-1]
    newshape = np.round(np.array(Mask.shape)*spacing/resolution).astype('int')
    m1 = Mask==3
    m2 = Mask==4
    Mask = m1+m2
    
    xx,yy,zz= np.where(Mask)
    box = np.array([[np.min(xx),np.max(xx)],[np.min(yy),np.max(yy)],[np.min(zz),np.max(zz)]])
    box = box*np.expand_dims(spacing,1)/np.expand_dims(resolution,1)
    box = np.floor(box).astype('int')
    margin = 5
    extendbox = np.vstack([np.max([[0,0,0],box[:,0]-margin],0),np.min([newshape,box[:,1]+2*margin],axis=0).T]).T

    this_annos = np.copy(annos[annos['seriesuid']==name])    
    if isClean:
        convex_mask = m1
        dm1 = process_mask(m1)
        dm2 = process_mask(m2)
        dilatedMask = dm1+dm2
        Mask = m1+m2
        extramask = dilatedMask ^ Mask
        bone_thresh = 210
        pad_value = 170

        sliceim,origin,spacing,isflip = load_itk_image(os.path.join(luna_data,name+'.mhd'))
        sliceim = lumTrans(sliceim)
        sliceim = sliceim*dilatedMask+pad_value*(1-dilatedMask).astype('uint8')
        bones = (sliceim*extramask)>bone_thresh
        sliceim[bones] = pad_value
        
        sliceim1,_ = resample(sliceim,spacing,resolution,order=1)
        sliceim2 = sliceim1[extendbox[0,0]:extendbox[0,1],
                    extendbox[1,0]:extendbox[1,1],
                    extendbox[2,0]:extendbox[2,1]]
        sliceim = sliceim2[np.newaxis,...]
        np.save(os.path.join(savepath,name+'_clean.npy'),sliceim)
        #writeGif(savepath+name+"_clean.gif", sliceim, duration=0.05,nq=0.1) 

    if islabel:

        this_annos = np.copy(annos[annos['seriesuid']==name])
        label = []
        if len(this_annos)>0:
            
            for c in this_annos:
                pos = worldToVoxelCoord(c[1:4][::-1],origin=origin,spacing=spacing)
                if isflip:
                    pos[1:] = Mask.shape[1:3]-pos[1:]
                label.append(np.concatenate([pos,[c[4]/spacing[1]]]))
            
        label = np.array(label)
        if len(label)==0:
            label2 = np.array([[0,0,0,0]])
        else:
            label2 = np.copy(label).T
            label2[:3] = label2[:3]*np.expand_dims(spacing,1)/np.expand_dims(resolution,1)
            label2[3] = label2[3]*spacing[1]/resolution[1]
            label2[:3] = label2[:3]-np.expand_dims(extendbox[:,0],1)
            label2 = label2[:4].T
        np.save(os.path.join(savepath,name+'_label.npy'),label2) 
        #writeGif(savepath+name+"_label.gif", label2, duration=0.05,nq=0.1) 
    print(name)

    
def preprocess_luna():
    luna_segment = config['luna_segment']
    savepath = config['preprocess_result_path']
    luna_data = config['luna_data']
    luna_label = config['luna_label']
    finished_flag = '.flag_preprocessluna'
    print('starting preprocessing luna')
    annos = df_node.copy()
    filelist = annos['seriesuid'].values
    print(filelist)
    if not os.path.exists(savepath):
        os.mkdir(savepath)

    for i in range(len(filelist)):
        savenpy_luna(i,annos=annos,filelist=filelist,luna_segment=luna_segment,luna_data=luna_data,savepath=savepath) 
    print('end preprocessing luna')

In [12]:
data_path = config['luna_raw']+'train\\'
csv_path = PATH['annotations_train']

pic_path = PATH['pic_train']
train_lung_path = PATH['model_train_lung']
train_nodule_path = PATH['model_train_nodule']

In [13]:
patients = load_train(data_path)
df_node = pd.read_csv(csv_path+"annotations.csv")
df_node["file"] = df_node["seriesuid"].map(lambda file_name: get_filename(patients, file_name))
df_node = df_node.dropna()

In [14]:
preprocess_luna()

starting preprocessing luna
['LKDS-00040' 'LKDS-00030' 'LKDS-00001' 'LKDS-00021' 'LKDS-00066'
 'LKDS-00003' 'LKDS-00003' 'LKDS-00061' 'LKDS-00058' 'LKDS-00050'
 'LKDS-00013' 'LKDS-00047' 'LKDS-00047' 'LKDS-00065' 'LKDS-00065'
 'LKDS-00007' 'LKDS-00004' 'LKDS-00051' 'LKDS-00035' 'LKDS-00035'
 'LKDS-00053' 'LKDS-00020' 'LKDS-00020' 'LKDS-00020' 'LKDS-00020'
 'LKDS-00020' 'LKDS-00020' 'LKDS-00020' 'LKDS-00020' 'LKDS-00020'
 'LKDS-00020' 'LKDS-00020' 'LKDS-00015' 'LKDS-00026' 'LKDS-00026'
 'LKDS-00041' 'LKDS-00025' 'LKDS-00044' 'LKDS-00064' 'LKDS-00064'
 'LKDS-00043' 'LKDS-00052' 'LKDS-00052' 'LKDS-00052' 'LKDS-00052'
 'LKDS-00052' 'LKDS-00052' 'LKDS-00019' 'LKDS-00019' 'LKDS-00062'
 'LKDS-00062' 'LKDS-00029' 'LKDS-00034' 'LKDS-00042' 'LKDS-00054'
 'LKDS-00054' 'LKDS-00054' 'LKDS-00054' 'LKDS-00054' 'LKDS-00039'
 'LKDS-00039' 'LKDS-00005' 'LKDS-00038' 'LKDS-00023' 'LKDS-00023'
 'LKDS-00036' 'LKDS-00011' 'LKDS-00028' 'LKDS-00016' 'LKDS-00016'
 'LKDS-00016']
LKDS-00040
LKDS-00040
LKDS-00030


KeyboardInterrupt: 