In [None]:
import os
import fnmatch
import SimpleITK as sitk
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
import numpy.linalg as npl
from scipy.ndimage.interpolation import rotate
#import time

In [None]:
datalist = list()
# Find the working directory
Wdir = %pwd
# List the mhd files in folder "data" in the working directory
for file in os.listdir(Wdir + '/data'):
    if fnmatch.fnmatch(file, '*.mhd'):
        datalist.append(file)

In [None]:
# Explore variation of spacings and dimensions of different CTs
Spacings = np.empty([len(datalist),3])
Dims = np.empty([len(datalist),3])
for nmbr in range(0,len(datalist)):
    #print(nmbr)
    path = Wdir + '\\data\\'
    itkimg = sitk.ReadImage(path + datalist[nmbr]) # Reads the image using SimpleITK
    CT_scan = sitk.GetArrayFromImage(itkimg) # First converts the image to a numpy array, then shuffles the dimensions to get the order z,y,x
    spc = itkimg.GetSpacing() # Voxel size
    Spacings[nmbr] = spc
    dm = CT_scan.shape
    Dims[nmbr] = dm
    
# np.save('Spacings.npy', Spacings)
# np.save('Dims.npy', Dims)

In [None]:
Spacings = np.load('Spacings.npy')
Dims = np.load('Dims.npy')

plt.hist(Spacings[:,0])
plt.title('Distribution of Voxel Dimension 1')
plt.figure()
plt.hist(Spacings[:,1])
plt.title('Distribution of Voxel Dimension 2')
plt.figure()
plt.hist(Spacings[:,2])
plt.title('Distribution of Voxel Dimension 3')

plt.figure()
plt.hist(Dims[:,2])
plt.title('Distribution of CT Dimension 1')
plt.figure()
plt.hist(Dims[:,1])
plt.title('Distribution of CT Dimension 2')
plt.figure()
plt.hist(Dims[:,0])
plt.title('Distribution of CT Dimension 3')

In [None]:
# Reads mhd/raw image using SimpleITK & returns image numpy array, offset & spacing of the image, and the coordinates transformation matrix
def readMhd(filename):

    itkimg = sitk.ReadImage(filename)
    CT_scan = sitk.GetArrayFromImage(itkimg)
    Spacing = itkimg.GetSpacing() 
    Offset = itkimg.GetOrigin() # Reads world coordinates of origin
    TransformMat = itkimg.GetDirection() # Reads transformation matrix
    TransformMat = np.asarray([TransformMat[0:3],TransformMat[3:6],TransformMat[6:9]])
    
    return CT_scan,Spacing,Offset,TransformMat

In [None]:
# Transforms real world to image coordinates for a point
def Wrld2Img(real_pt,Offset,Spacing,TransformMat):
    
    img_pt = real_pt - Offset
    for i in range(0,3):
        TransformMat[:,i] = TransformMat[:,i]*Spacing[i]
    Wrld2ImgMat = npl.inv(TransformMat) 
    img_pt = np.round(np.matmul(Wrld2ImgMat,img_pt))    
    return img_pt

In [None]:
# Pads the image (if necessary) for the centroid to be in the middle
def Padd(CT_scan, ctrd, PatchHalfSize):
    
    if np.any(ctrd<PatchHalfSize) or np.any(ctrd+PatchHalfSize>CT_scan.shape): # check if padding is necessary
        ctrd = ctrd + max(PatchHalfSize)
        CT_scan = np.pad(CT_scan,((max(PatchHalfSize),max(PatchHalfSize),)),mode='edge')
        
    return CT_scan,ctrd

In [None]:
# Extracts a cube at the given centroid & resamples the scan
def XtrctCube(CT_scan, ctrd, PatchHalfSize, PatchSize_v):
    
    Patch = CT_scan[ctrd[0]-PatchHalfSize[0]:ctrd[0]+PatchHalfSize[0], ctrd[1]-PatchHalfSize[1]:ctrd[1]+PatchHalfSize[1], ctrd[2]-PatchHalfSize[2]:ctrd[2]+PatchHalfSize[2]]
    Patch = zoom(Patch,(PatchSize_v/Patch.shape[0],PatchSize_v/Patch.shape[1],PatchSize_v/Patch.shape[2]),order=2) #resample for cube_size
    
    return Patch

In [None]:
# Extracts a cube of the specified size around the given coordinates ctrd
def GetPatch(CT_scan,ctrd,PatchSize_v,PatchSize_mm,Spacing):
    
    ctrd = np.asarray(list(reversed(ctrd)), dtype=np.int)
    Spacing = np.asarray(list(reversed(Spacing)))
    
    PatchHalfSize = np.asarray((PatchSize_mm/Spacing)/2,dtype=np.int) # Half of cube-size in voxels
    [CT_scan,ctrd] = Padd(CT_scan, ctrd, PatchHalfSize)
    Patch = XtrctCube(CT_scan, ctrd, PatchHalfSize, PatchSize_v)
           
    return Patch

In [None]:
# Defines the destination of saving extracted cube of scan/mask according to radiologist number: R1, R2, R3, R4, R5 existing folder names
def Path(rad):
    switcher={
            1: 'R1',
            2: 'R2',
            3: 'R3',
#             4: 'R4',
#             5: 'R5'
            }
    return switcher.get(rad)

In [None]:
# Randomly rotates an image by a random angle
def RandRotate (Scan, maxAngle):
    angle = np.random.uniform(-maxAngle, maxAngle)
    scn1 = rotate(Scan, angle, mode='nearest', axes=(0,1), reshape=False) # Rotate along z-axis

    angle = np.random.uniform(-maxAngle, maxAngle)
    scn2 = rotate(scn1, angle, mode='nearest', axes=(0, 2), reshape=False) # Rotate along y-axis

    angle = np.random.uniform(-maxAngle, maxAngle)
    RotScan = rotate(scn2, angle, mode='nearest', axes=(1, 2), reshape=False) # Rotate along x-axis
        
    return RotScan

In [None]:
# Augments the data by doing flip & transpose
def Augment(Scan):
    Aug1 = np.flip(Scan, axis=0)
    Aug2 = np.flip(Scan, axis=1)
    Aug3 = np.flip(Scan, axis=2)
    Aug4 = np.transpose(Scan, (0, 2, 1))
    
    return Aug1,Aug2,Aug3,Aug4
    

In [None]:
df = pd.read_csv('trainNodules.csv') # Read nodules csv
LNDbIDs = df.LNDbID
RadIDs = df.RadID
FindingIDs = df.FindingID

for i in range(0,len(LNDbIDs)):
    #time.sleep(1)
    #print(i)
    
    lnd = LNDbIDs[i] # i-th row of trainNodules.csv
    rad = RadIDs[i]
    finding = FindingIDs[i]

    [CT_scan,Spacing,Offset,TransformMat] =  readMhd('data/LNDb-{:04}.mhd'.format(lnd))
    [Mask,Spacing,Offset,TransformMat] =  readMhd('masks/LNDb-{:04}_rad{}.mhd'.format(lnd,rad))
    
    # Read the centroid of the nodule
    a = LNDbIDs == lnd
    b = RadIDs == rad
    c = FindingIDs == finding
    ctrd = np.array([float(df.x[a&b&c]), float(df.y[a&b&c]), float(df.z[a&b&c])])
    
    ctrd = Wrld2Img(ctrd,Offset,Spacing,TransformMat) # Convert world coordinates to image coordinates
    
    # Display nodule scan/mask slice
#     fig, axs = plt.subplots(1,2)
#     axs[0].imshow(CT_scan[int(ctrd[2])])
#     axs[1].imshow(Mask[int(ctrd[2])])
#     plt.show() 
    
    # Extract cube around nodule
    PatchSize_v = 80 # We should submit a 80x80x80 cube with voxelsize 0.6375mm centered on the nodule centroid with the predicted segmentation for each nodule.
    PatchSize_mm = 51 # 80*0.6375 = 51, about 70% larger than the diameter of the largest nodule
    ScanPatch = GetPatch(CT_scan,ctrd,PatchSize_v,PatchSize_mm,Spacing)
    Mask[Mask>0] = 1
    Mask[Mask!=finding] = 0
    MaskPatch = GetPatch(Mask,ctrd,PatchSize_v,PatchSize_mm,Spacing)
        
    PATH = Path(rad)
    np.save(os.path.join(PATH , 'LNDb{:04}-R{:01}-F{:01}.npy'.format(lnd, rad, finding)), ScanPatch)
    np.save(os.path.join(PATH , 'Mask_LNDb{:04}-R{:01}-F{:01}.npy'.format(lnd, rad, finding)), MaskPatch)

    
    # Display mid slices from resampled scan/mask
#     fig, axs = plt.subplots(2,3)
#     axs[0,0].imshow(ScanPatch[int(ScanPatch.shape[0]/2),:,:])
#     axs[1,0].imshow(MaskPatch[int(MaskPatch.shape[0]/2),:,:])
#     axs[0,1].imshow(ScanPatch[:,int(ScanPatch.shape[1]/2),:])
#     axs[1,1].imshow(MaskPatch[:,int(MaskPatch.shape[1]/2),:])
#     axs[0,2].imshow(ScanPatch[:,:,int(ScanPatch.shape[2]/2)])
#     axs[1,2].imshow(MaskPatch[:,:,int(MaskPatch.shape[2]/2)])    
#     plt.show()

In [None]:
# R1 Folder Augmentation
datalist = list()
Wdir = %pwd
for file in os.listdir(Wdir + '/Data2_80/R1'):
    if fnmatch.fnmatch(file, '*.npy'):
        datalist.append(file)
        
for nmbr in range(0,int(len(datalist)/2)):
#     print(nmbr)
    Scan = np.load(os.path.join('Data2_80', 'R1', datalist[nmbr]))
    Mask = np.load(os.path.join('Data2_80', 'R1', datalist[nmbr+int(len(datalist)/2)]))
    [SAug1,SAug2,SAug3,SAug4] = Augment(Scan)
    [MAug1,MAug2,MAug3,MAug4] = Augment(Mask)
    
    np.save(os.path.join('Data2_80/R1/R1Aug', 'Aug1_' + datalist[nmbr]), SAug1)
    np.save(os.path.join('Data2_80/R1/R1Aug', 'Aug2_' + datalist[nmbr]), SAug2)
    np.save(os.path.join('Data2_80/R1/R1Aug', 'Aug3_' + datalist[nmbr]), SAug3)
    np.save(os.path.join('Data2_80/R1/R1Aug', 'Aug4_' + datalist[nmbr]), SAug4)
    np.save(os.path.join('Data2_80/R1/R1Aug', 'Aug1_' + datalist[nmbr+int(len(datalist)/2)]), MAug1)
    np.save(os.path.join('Data2_80/R1/R1Aug', 'Aug2_' + datalist[nmbr+int(len(datalist)/2)]), MAug2)
    np.save(os.path.join('Data2_80/R1/R1Aug', 'Aug3_' + datalist[nmbr+int(len(datalist)/2)]), MAug3)
    np.save(os.path.join('Data2_80/R1/R1Aug', 'Aug4_' + datalist[nmbr+int(len(datalist)/2)]), MAug4)

In [None]:
# R2 Folder Augmentation
datalist = list()
Wdir = %pwd
for file in os.listdir(Wdir + '/Data2_80/R2'):
    if fnmatch.fnmatch(file, '*.npy'):
        datalist.append(file)
        
for nmbr in range(0,int(len(datalist)/2)):
#     print(nmbr)
    Scan = np.load(os.path.join('Data2_80', 'R2', datalist[nmbr]))
    Mask = np.load(os.path.join('Data2_80', 'R2', datalist[nmbr+int(len(datalist)/2)]))
    [SAug1,SAug2,SAug3,SAug4] = Augment(Scan)
    [MAug1,MAug2,MAug3,MAug4] = Augment(Mask)
    
    np.save(os.path.join('Data2_80/R2/R2Aug', 'Aug1_' + datalist[nmbr]), SAug1)
    np.save(os.path.join('Data2_80/R2/R2Aug', 'Aug2_' + datalist[nmbr]), SAug2)
    np.save(os.path.join('Data2_80/R2/R2Aug', 'Aug3_' + datalist[nmbr]), SAug3)
    np.save(os.path.join('Data2_80/R2/R2Aug', 'Aug4_' + datalist[nmbr]), SAug4)
    np.save(os.path.join('Data2_80/R2/R2Aug', 'Aug1_' + datalist[nmbr+int(len(datalist)/2)]), MAug1)
    np.save(os.path.join('Data2_80/R2/R2Aug', 'Aug2_' + datalist[nmbr+int(len(datalist)/2)]), MAug2)
    np.save(os.path.join('Data2_80/R2/R2Aug', 'Aug3_' + datalist[nmbr+int(len(datalist)/2)]), MAug3)
    np.save(os.path.join('Data2_80/R2/R2Aug', 'Aug4_' + datalist[nmbr+int(len(datalist)/2)]), MAug4)


In [None]:
# R3 Folder Augmentation
datalist = list()
Wdir = %pwd
for file in os.listdir(Wdir + '/Data2_80/R3'):
    if fnmatch.fnmatch(file, '*.npy'):
        datalist.append(file)
        
for nmbr in range(0,int(len(datalist)/2)):
#     print(nmbr)
    Scan = np.load(os.path.join('Data2_80', 'R3', datalist[nmbr]))
    Mask = np.load(os.path.join('Data2_80', 'R3', datalist[nmbr+int(len(datalist)/2)]))
    [SAug1,SAug2,SAug3,SAug4] = Augment(Scan)
    [MAug1,MAug2,MAug3,MAug4] = Augment(Mask)
    
    np.save(os.path.join('Data2_80/R3/R3Aug', 'Aug1_'+ datalist[nmbr]), SAug1)
    np.save(os.path.join('Data2_80/R3/R3Aug', 'Aug2_' + datalist[nmbr]), SAug2)
    np.save(os.path.join('Data2_80/R3/R3Aug', 'Aug3_' + datalist[nmbr]), SAug3)
    np.save(os.path.join('Data2_80/R3/R3Aug', 'Aug4_' + datalist[nmbr]), SAug4)
    np.save(os.path.join('Data2_80/R3/R3Aug', 'Aug1_' + datalist[nmbr+int(len(datalist)/2)]), MAug1)
    np.save(os.path.join('Data2_80/R3/R3Aug', 'Aug2_' + datalist[nmbr+int(len(datalist)/2)]), MAug2)
    np.save(os.path.join('Data2_80/R3/R3Aug', 'Aug3_' + datalist[nmbr+int(len(datalist)/2)]), MAug3)
    np.save(os.path.join('Data2_80/R3/R3Aug', 'Aug4_' + datalist[nmbr+int(len(datalist)/2)]), MAug4)
