In [1]:
#Modules
import os
import gc
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
from scipy import ndimage
from sklearn.model_selection import train_test_split
from PIL import Image

In [2]:
#preparing filepaths using list comprehension
ImageFolderPath = '/data/groups/gomesr/PDAC_Scans/Datasets/Decathlon/Scans/'
LabelFolderPath = '/data/groups/gomesr/PDAC_Scans/Datasets/Decathlon/Labels/'

#creating the file paths using list comprehension
scanArray = [ImageFolderPath + x for x in sorted(os.listdir(ImageFolderPath))]
labelArray = [LabelFolderPath + x for x in sorted(os.listdir(LabelFolderPath))]

In [3]:
#Increase memory

X_train, X_test, y_train, y_test = train_test_split(scanArray, labelArray, test_size=0.2, random_state=42)

X_train, y_train = sorted(X_train), sorted(y_train)
X_test, y_test = sorted(X_test), sorted(y_test)

#X_test

In [4]:
len(y_test)

54

In [5]:
label_img = nib.load(y_train[0])

# Convert the label image to a NumPy array
sample_label = label_img.get_fdata()

# Check unique values in the label scan
unique_values = np.unique(sample_label)

print("Unique values in the first label scan:", unique_values)

Unique values in the first label scan: [0. 1. 2.]


In [6]:
# correct = 0
# for i in range(len(X_test)):
#     if os.path.basename(X_test[i]) == os.path.basename(y_test[i]):
#         correct += 1

# correct

In [7]:
def displayProcessed(numpyArray, name):
    slice_index = 130
    image_slice = numpyArray[:, :, slice_index, 0]

    plt.title(name)
    plt.imshow(image_slice, cmap='gray')
    plt.show()

In [8]:
    #Cell specific varaibles
    iterCheckpoints = {int(len(scanArray) * 0.25): '25%', int(len(scanArray) * 0.5): '50%', int(len(scanArray) * 0.75): '75%'}
    resolution = (128, 128, 128)
    
    MIN_HU = -200
    MAX_HU = 200
    
    def resize(image, target_size, labelCheck=False):
        zoom_factors = (
            target_size[0] / image.shape[0],
            target_size[1] / image.shape[1],
            target_size[2] / image.shape[2]
        )

        if labelCheck:
            resized_image = ndimage.zoom(image, zoom=zoom_factors, order=0)
            resized_image = np.round(resized_image).astype(int)
        else:
            resized_image = ndimage.zoom(image, zoom=zoom_factors, order=1)

        return resized_image

    
    #normalizes tissue density and sets the output to a float
    def huNormalize(volume):
        volume[volume < MIN_HU] = MIN_HU
        volume[volume > MAX_HU] = MAX_HU
        volume = (volume - MIN_HU) / (MAX_HU - MIN_HU)
        volume = np.nan_to_num(volume)
        
        return volume.astype('float32')

    #Cropping function in order to crop out the redundant information of the CT scans (the background)
    def numpyCrop(ImageArray):
        height, width = ImageArray.shape[:2]

        # Cropping the y margin by 20% top and bottom and x margin by 5% left and right.
        yMargin = int(height * 0.4 / 2)
        xMargin = int(width * 0.1 / 2)

        top_margin = yMargin
        bottom_margin = height - yMargin
        left_margin = xMargin
        right_margin = width - xMargin
        cropped_image = ImageArray[top_margin:bottom_margin, left_margin:right_margin]

        return cropped_image

    def imageProcessor(ImageArray, filePath, labelCheck = False,):
        print(f'Processing files to {os.path.basename(os.path.normpath(filePath))}')
        val = 0

        for x in ImageArray:
            fileName = os.path.basename(x).split(".", 1)[0]
            
            img = nib.load(x)
             #rotates, mirrors and then crops the image.
            data = numpyCrop(np.fliplr(np.rot90(np.array(img.dataobj))))
            #data = data.transpose(2, 0, 1)

            if labelCheck:
                data = resize(data, resolution, labelCheck=True)
                data = np.round(data).astype(int)
            else:
                data = resize(huNormalize(data), resolution, labelCheck=False)

            data = data[:, :, :, np.newaxis]

            #displayProcessed(data, fileName)
            np.save(filePath+fileName+'.npy', data)
            del img, data
            gc.collect()

            
            val += 1
            if val in iterCheckpoints:
                print(f'File processing {iterCheckpoints[val]} complete')

        print('Complete')
        
imageProcessor(X_train, './PDAC_NPprepedData/TrainingData/', labelCheck=False)
imageProcessor(X_test, './PDAC_NPprepedData/TestingData/', labelCheck=False)

imageProcessor(y_train, './PDAC_NPprepedData/TrainingLabels/', labelCheck=True)
imageProcessor(y_test, './PDAC_NPprepedData/TestingLabels/', labelCheck=True)

Processing files to TrainingData
File processing 25% complete
File processing 50% complete
File processing 75% complete
Complete
Processing files to TestingData
Complete
Processing files to TrainingLabels
File processing 25% complete
File processing 50% complete
File processing 75% complete
Complete
Processing files to TestingLabels
Complete
