In [1]:
%matplotlib inline
import SimpleITK as sitk
import numpy as np
import csv
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm_notebook as tq


In [2]:
def get_filename(case):
    global file_list
    for f in file_list:
        if case in f:
            return(f)

In [3]:
def make_mask(center,diam,z,width,height,spacing,origin):
    mask = np.zeros([height,width]) # 0's everywhere except nodule swapping x,y to match img
    #convert to nodule space from world coordinates

    # Defining the voxel range in which the nodule falls
    v_center = (center-origin)/spacing
    v_diam = int((diam+5)/spacing[0])
    v_xmin = np.max([0,int(v_center[0]-v_diam)])
    v_xmax = np.min([width-1,int(v_center[0]+v_diam)])
    v_ymin = np.max([0,int(v_center[1]-v_diam)]) 
    v_ymax = np.min([height-1,int(v_center[1]+v_diam)])

    v_xrange = range(v_xmin,v_xmax+1)
    v_yrange = range(v_ymin,v_ymax+1)

    # Convert back to world coordinates for distance calculation
    x_data = [x*spacing[0]+origin[0] for x in range(width)]
    y_data = [x*spacing[1]+origin[1] for x in range(height)]
    for v_x in v_xrange:
        for v_y in v_yrange:
            p_x = spacing[0]*v_x + origin[0]
            p_y = spacing[1]*v_y + origin[1]
            if np.linalg.norm(center-np.array([p_x,p_y,z]))<=diam:
                mask[int((p_y-origin[1])/spacing[1]),int((p_x-origin[0])/spacing[0])] = 1.0
    return(mask)

In [4]:
MIN_BOUND=-1000
MAX_BOUND=400
def normalize(image):
    image = (image - MIN_BOUND)/(MAX_BOUND - MIN_BOUND)
    image[image>1]=1
    image[image<0]=0
    return image

In [5]:
def matrix2int16(matrix):
    ''' 
matrix must be a numpy array NXN
Returns uint16 version
    '''
    m_min= np.min(matrix)
    m_max= np.max(matrix)
    matrix = matrix-m_min
    return(np.array(np.rint( (matrix-m_min)/float(m_max-m_min) * 65535.0),dtype=np.uint16))


In [13]:
train_pts = [1,2,3,4,5,6]
val_pts = [7,8]
test_pts = [9]

In [7]:
annotation_path='C:/Users/USER/Downloads/pfa/CSVFILES/annotations.csv' #kol had ybadel 

tr_output_img_path="C:/Users/USER/Downloads/NORMt/luna_img_mask/train/images/class/"
tr_output_mask_path="C:/Users/USER/Downloads/NORM/luna_img_mask/train/labels/class/"

if not os.path.isdir(tr_output_img_path):
    os.makedirs(tr_output_img_path)
if not os.path.isdir(tr_output_mask_path):
    os.makedirs(tr_output_mask_path)

v_output_img_path="C:/Users/USER/Downloads/NORM/luna_img_mask/val/images/class/"
v_output_mask_path="C:/Users/USER/Downloads/NORM/luna_img_mask/val/labels/class/"

if not os.path.isdir(v_output_img_path):
    os.makedirs(v_output_img_path)
if not os.path.isdir(v_output_mask_path):
    os.makedirs(v_output_mask_path)
    
ts_output_img_path="C:/Users/USER/Downloads/NORM/luna_img_mask/test/images/class/"
ts_output_mask_path="C:/Users/USER/Downloads/NORM/luna_img_mask/test/labels/class/"

if not os.path.isdir(ts_output_img_path):
    os.makedirs(ts_output_img_path)
if not os.path.isdir(ts_output_mask_path):
    os.makedirs(ts_output_mask_path)

In [14]:
# __________________0 1 6_______________________________________
# _______________   2 3 4 5______________________________________
for tr in list(train_pts):
    print('Set:',tr)
    luna_subset_path='C:/Users/USER/Downloads/pfa/subset'+str(tr)+'/' # KOL HAD YBADEL SUBSET MTEOUU WIN 
    file_list=glob(luna_subset_path+"*.mhd")
    df_node = pd.read_csv(annotation_path)
    df_node["file"] = df_node["seriesuid"].apply(get_filename)
    df_node = df_node.dropna() 
    for img_file in tq(file_list):       
        #print "Getting mask for image file %s" % img_file.replace(luna_subset_path,"")
        mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file
#         print(len(mini_df))
        if len(mini_df)>0:       # some files may not have a nodule--skipping those 
            for index, row in mini_df.iterrows():
                node_x = row["coordX"]
                node_y = row["coordY"]
                node_z = row["coordZ"]
                diam = row["diameter_mm"]
                itk_img = sitk.ReadImage(img_file) 
                img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
                center = np.array([node_x,node_y,node_z])   # nodule center
                origin = np.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
                spacing = np.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
                v_center =np.rint((center-origin)/spacing)  # nodule center in voxel space (still x,y,z ordering)
                num_z, height, width = img_array.shape

                center = np.array([node_x, node_y, node_z])   # nodule center
                v_center = np.rint((center-origin)/spacing) 

                for i_z in range(int(v_center[2])-1,int(v_center[2])+2):
                    mask = make_mask(center,diam,i_z*spacing[2]+origin[2],width,height,spacing,origin)
                    masks = mask
        #             imgs = matrix2int16(img_array[i_z])
                    imgs = img_array[i_z]
                    middle = imgs[100:400,100:400]
                    mean = np.mean(middle)  
                    max = np.max(imgs)
                    min = np.min(imgs)
                    # To improve threshold finding, I'm moving the 
                    # underflow and overflow on the pixel spectrum
                    imgs[imgs==max]=mean
                    imgs[imgs==min]=mean
        #             print('orig:',img_array[i_z].max())
        #             print('int16:',imgs.max())
                    imgs=normalize(imgs)
                    np.save(tr_output_img_path+"images_"+str(index)+".npy",imgs)
                    np.save(tr_output_mask_path+"masks_"+str(index)+".npy",masks)


Set: 1


  0%|          | 0/89 [00:00<?, ?it/s]

Set: 6


  0%|          | 0/89 [00:00<?, ?it/s]

In [9]:
________________7 8 _________________________________________
for val in list(val_pts):
    print('Set:',val)
    luna_subset_path="C:/Users/USER/Downloads/utorrent_dow/subset"+str(val)+"/" # KOL HAD YBADEL SUBSET MTEOUU WIN 
    file_list=glob(luna_subset_path+"*.mhd")
    df_node = pd.read_csv(annotation_path)
    df_node["file"] = df_node["seriesuid"].apply(get_filename)
    df_node = df_node.dropna()
    for img_file in tq(file_list):       
        #print "Getting mask for image file %s" % img_file.replace(luna_subset_path,"")
        mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file
#         print(len(mini_df))
        if len(mini_df)>0:       # some files may not have a nodule--skipping those 
            for index, row in mini_df.iterrows():
                node_x = row["coordX"]
                node_y = row["coordY"]
                node_z = row["coordZ"]
                diam = row["diameter_mm"]
                itk_img = sitk.ReadImage(img_file) 
                img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
                center = np.array([node_x,node_y,node_z])   # nodule center
                origin = np.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
                spacing = np.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
                v_center =np.rint((center-origin)/spacing)  # nodule center in voxel space (still x,y,z ordering)
                num_z, height, width = img_array.shape

                center = np.array([node_x, node_y, node_z])   # nodule center
                v_center = np.rint((center-origin)/spacing) 

                for i_z in range(int(v_center[2])-1,int(v_center[2])+2):
                    mask = make_mask(center,diam,i_z*spacing[2]+origin[2],width,height,spacing,origin)
                    masks = mask
        #             imgs = matrix2int16(img_array[i_z])
                    imgs = img_array[i_z]
                    middle = imgs[100:400,100:400]
                    mean = np.mean(middle)  
                    max = np.max(imgs)
                    min = np.min(imgs)
                    # To improve threshold finding, I'm moving the 
                    # underflow and overflow on the pixel spectrum
                    imgs[imgs==max]=mean
                    imgs[imgs==min]=mean
        #             print('orig:',img_array[i_z].max())
        #             print('int16:',imgs.max())
                    imgs=normalize(imgs)
                    np.save(v_output_img_path+"images_"+str(index)+".npy",imgs)
                    np.save(v_output_mask_path+"masks_"+str(index)+".npy",masks)
'''


'\n# MONTA ________________7 8 _________________________________________\nfor val in list(val_pts):\n    print(\'Set:\',val)\n    luna_subset_path="C:/Users/USER/Downloads/utorrent_dow/subset"+str(val)+"/" # KOL HAD YBADEL SUBSET MTEOUU WIN \n    file_list=glob(luna_subset_path+"*.mhd")\n    df_node = pd.read_csv(annotation_path)\n    df_node["file"] = df_node["seriesuid"].apply(get_filename)\n    df_node = df_node.dropna()\n    for img_file in tq(file_list):       \n        #print "Getting mask for image file %s" % img_file.replace(luna_subset_path,"")\n        mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file\n#         print(len(mini_df))\n        if len(mini_df)>0:       # some files may not have a nodule--skipping those \n            for index, row in mini_df.iterrows():\n                node_x = row["coordX"]\n                node_y = row["coordY"]\n                node_z = row["coordZ"]\n                diam = row["diameter_mm"]\n                i

In [10]:

#  _________________________ 9 ________________________________
for ts in list(test_pts):
    print('Set:',ts)
    luna_subset_path="C:/Users/USER/Downloads/pfa/subset"+str(ts)+"/" # KOL HAD YBADEL SUBSET MTEOUU WIN 
    file_list=glob(luna_subset_path+"*.mhd")
    df_node = pd.read_csv(annotation_path)
    df_node["file"] = df_node["seriesuid"].apply(get_filename)
    df_node = df_node.dropna()
    for img_file in tq(file_list):       
        #print "Getting mask for image file %s" % img_file.replace(luna_subset_path,"")
        mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file
#         print(len(mini_df))
        if len(mini_df)>0:       # some files may not have a nodule--skipping those 
            for index, row in mini_df.iterrows():
                node_x = row["coordX"]
                node_y = row["coordY"]
                node_z = row["coordZ"]
                diam = row["diameter_mm"]
                itk_img = sitk.ReadImage(img_file) 
                img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
                center = np.array([node_x,node_y,node_z])   # nodule center
                origin = np.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
                spacing = np.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
                v_center =np.rint((center-origin)/spacing)  # nodule center in voxel space (still x,y,z ordering)
                num_z, height, width = img_array.shape

                center = np.array([node_x, node_y, node_z])   # nodule center
                v_center = np.rint((center-origin)/spacing) 

                for i_z in range(int(v_center[2])-1,int(v_center[2])+2):
                    mask = make_mask(center,diam,i_z*spacing[2]+origin[2],width,height,spacing,origin)
                    masks = mask
        #             imgs = matrix2int16(img_array[i_z])
                    imgs = img_array[i_z]
                    middle = imgs[100:400,100:400]
                    mean = np.mean(middle)  
                    max = np.max(imgs)
                    min = np.min(imgs)
                    # To improve threshold finding, I'm moving the 
                    # underflow and overflow on the pixel spectrum
                    imgs[imgs==max]=mean
                    imgs[imgs==min]=mean
        #             print('orig:',img_array[i_z].max())
        #             print('int16:',imgs.max())
                    imgs=normalize(imgs)
                    np.save(ts_output_img_path+"images_"+str(index)+".npy",imgs)
                    np.save(ts_output_mask_path+"masks_"+str(index)+".npy",masks)



Set: 9


  0%|          | 0/88 [00:00<?, ?it/s]

In [19]:
x=np.load('C:/Users/user/Downloads/Norm_luna_img_mask/test/images/images/images_486.npy')
y=np.load('C:/Users/user/Downloads/luna_img_mask/test/images/images_486.npy')
print(x)
print('_______________')
print(y)

[[0.02       0.01       0.00642857 ... 0.01214286 0.46285714 0.00785714]
 [0.02214286 0.00928571 0.01357143 ... 0.01428571 0.46285714 0.00785714]
 [0.02142857 0.01571429 0.01714286 ... 0.00428571 0.46285714 0.01714286]
 ...
 [0.00285714 0.00285714 0.02       ... 0.04       0.05071429 0.03857143]
 [0.01       0.01785714 0.02714286 ... 0.03714286 0.03285714 0.03642857]
 [0.025      0.025      0.02357143 ... 0.04214286 0.04071429 0.04928571]]
_______________
[[-972 -986 -991 ... -983 -352 -989]
 [-969 -987 -981 ... -980 -352 -989]
 [-970 -978 -976 ... -994 -352 -976]
 ...
 [-996 -996 -972 ... -944 -929 -946]
 [-986 -975 -962 ... -948 -954 -949]
 [-965 -965 -967 ... -941 -943 -931]]
