To perform good segmentation, one must first understand the provided data set
* How image & mask look like
* Pixel values of each image & mask etc.

Dataset provided:
* Image : CT Scan image of patient in converted to .png format
* Mask : GroundTruth for image (Already labelled)

In [None]:
import os
import matplotlib.pyplot as plt
import re, glob
import cv2
import numpy as np
from PIL import Image
import PIL
import nrrd

In [None]:
data_dir = '/home/tester/jianhoong/jh_fyp_work/ct_scans_data/processed_data/'

z_train_image = os.path.join(data_dir, 'train_data/training_images/training_images_new')
z_valid_image = os.path.join(data_dir, 'valid_data/valid_images/valid_images')
z_test_image = os.path.join(data_dir, 'test_data/test_images/testing_images_new')

z_train_mask = os.path.join(data_dir, 'train_data/training_masks/training_masks_new')
z_valid_mask = os.path.join(data_dir, 'valid_data/valid_masks/valid_masks')
z_test_mask = os.path.join(data_dir, 'test_data/test_masks/testing_masks_new')

In [None]:
# # Getting all patient numbers in specific directory
patient_num_train = sorted(list(map(int, {re.search('P(.*)-slice*', file).group(1) for file in os.listdir(z_train_image)})))
patient_num_valid = sorted(list(map(int, {re.search('P(.*)-slice*', file).group(1) for file in os.listdir(z_valid_image)})))
patient_num_test = sorted(list(map(int, {re.search('P(.*)-slice*', file).group(1) for file in os.listdir(z_test_image)})))

In [None]:
print(len(patient_num_train))
print(len(patient_num_valid))
print(len(patient_num_test))

In [None]:
# Patient Dict : {Patient Number : Number of Slices}
z_patient_dict_train = dict()
z_patient_dict_valid = dict()
z_patient_dict_test = dict()

for i in range(len(patient_num_train)): # i represents each patient's number (Assuming there is no jump in patient number. Else must rewrite logic)
    current_patient_num = 'P' + str(i+1)
    current_patient_slice_amt = max(map(int,[re.search(current_patient_num + '-slice(.*)_z.png', file).group(1) for file in os.listdir(z_train_image) if file.startswith(current_patient_num+"-")]))
    z_patient_dict_train.update({i+1: current_patient_slice_amt})

z_patient_dict_valid = dict()
for i in patient_num_valid:
    current_patient_num = 'P' + str(i)
    current_patient_slice_amt = max(map(int,[re.search(current_patient_num + '-slice(.*)_z.png', file).group(1) for file in os.listdir(z_valid_image) if file.startswith(current_patient_num+"-")]))
    z_patient_dict_valid.update({i: current_patient_slice_amt})

z_patient_test_valid = dict()
for i in patient_num_test:
    current_patient_num = 'P' + str(i)
    current_patient_slice_amt = max(map(int,[re.search(current_patient_num + '-slice(.*)_z.png', file).group(1) for file in os.listdir(z_test_image) if file.startswith(current_patient_num+"-")]))
    z_patient_dict_test.update({i: current_patient_slice_amt})

In [None]:
# Patient Num : Num of Slice (Z-axis)
print(z_patient_dict_train)
print('-----')
print(z_patient_dict_valid)
print('-----')
print(z_patient_dict_test)

In [None]:
train_msk_present_slices = dict()
valid_msk_present_slices = dict()
test_msk_present_slices = dict()


In [None]:
for patient in patient_num_train:
    print('patient', patient)
    new_patient = []
    for i in range(z_patient_dict_train[patient]):
        z_msk = cv2.imread(z_train_mask + '/P' + str(patient)+'-slice'+'{0}'.format(i+1).zfill(3)+'_z.png',-1) # Get .png in np array format
        if np.any(z_msk):
            new_patient.append(i+1)
    print(patient, new_patient)
    train_msk_present_slices.update({patient : new_patient})

In [None]:
for patient in patient_num_valid:
    new_patient = []
    for i in range(z_patient_dict_valid[patient]):
        z_msk = cv2.imread(z_valid_mask + '/P' + str(patient)+'-slice'+'{0}'.format(i+1).zfill(3)+'_z.png',-1) # Get .png in np array format
        if np.any(z_msk):
            new_patient.append(i+1)
    print(patient, new_patient)
    valid_msk_present_slices.update({patient : new_patient})

In [None]:
for patient in patient_num_test:
    new_patient = []
    for i in range(z_patient_dict_test[patient]):
        z_msk = cv2.imread(z_test_mask + '/P' + str(patient)+'-slice'+'{0}'.format(i+1).zfill(3)+'_z.png',-1) # Get .png in np array format
        if np.any(z_msk):
            new_patient.append(i+1)
    print(patient, new_patient)
    test_msk_present_slices.update({patient : new_patient})

In [None]:
train_msk_ovr_100 = list()
train_msk_within_100 = list()

valid_msk_ovr_100 = list()
valid_msk_within_100 = list()

test_msk_ovr_100 = list()
test_msk_within_100 = list()

In [None]:
for key,value in train_msk_present_slices.items():
    if len(value) == 0:
        print('Patient with no mask: ',key)
    elif min(value) > 100:
        train_msk_ovr_100.append(key)
    else:
        train_msk_within_100.append(key)

for key,value in valid_msk_present_slices.items():
    if len(value) == 0:
        print('Patient with no mask: ',key)
    elif min(value) > 100:
        valid_msk_ovr_100.append(key)
    else:
        valid_msk_within_100.append(key)

for key,value in test_msk_present_slices.items():
    if len(value) == 0:
        print('Patient with no mask: ',key)
    elif min(value) > 100:
        test_msk_ovr_100.append(key)
    else:
        test_msk_within_100.append(key)

In [None]:
print('train_msk_exceed_100: ', len(train_msk_ovr_100))
print('train_msk_within_first_100: ',len(train_msk_within_100))
print('--')
print(len(valid_msk_ovr_100))
print(len(valid_msk_within_100))
print('--')
print(len(test_msk_ovr_100))
print(len(test_msk_within_100))

In [None]:
data_dir = '/home/tester/jianhoong/jh_fyp_work/ct_scans_data/raw_data/'

z_train = os.path.join(data_dir, 'training_data_z')
z_train_image = os.path.join(z_train, 'training_images/training_images')
z_train_mask = os.path.join(z_train, 'training_masks/training_masks')

z_valid = os.path.join(data_dir, 'valid_data_z')
z_valid_image = os.path.join(z_valid, 'valid_images/valid_images')
z_valid_mask = os.path.join(z_valid, 'valid_masks/valid_masks')

z_test = os.path.join(data_dir, 'testing_data_z')
z_test_image = os.path.join(z_test, 'testing_images/testing_images')
z_test_mask = os.path.join(z_test, 'testing_masks/testing_masks')

In [None]:
patient = 1
os.path.join(z_train_image, 'P' + str(patient) + '_image.nrrd')

In [None]:
train_patient_dim_3 = {}
train_patient_dim_5 = {}

In [None]:
for patient in train_msk_within_100:
    filePath = os.path.join(z_train_image, 'P' + str(patient) + '_image.nrrd')
    readdata, header = nrrd.read(filePath)
    thickness = header['space directions'][2][2] 
    if thickness <= 3.1:
        train_patient_dim_3.update({patient : thickness})
    elif thickness >= 4.9:
        train_patient_dim_5.update({patient : thickness})
    else:
        print(patient, thickness)
    print(patient)

In [None]:
valid_patient_dim_3= dict()
valid_patient_dim_5= dict()
test_patient_dim_3= dict()
test_patient_dim_5= dict()

In [None]:
for patient in valid_msk_within_100:
    filePath = os.path.join(z_valid_image, 'P' + str(patient) + '_image.nrrd')
    readdata, header = nrrd.read(filePath)
    thickness = header['space directions'][2][2] 
    if thickness <= 3.1:
        valid_patient_dim_3.update({patient : thickness})
    elif thickness >= 4.9:
        valid_patient_dim_5.update({patient : thickness})
    else:
        print(patient, thickness)
    print(patient)

for patient in test_msk_within_100:
    filePath = os.path.join(z_test_image, 'P' + str(patient) + '_image.nrrd')
    readdata, header = nrrd.read(filePath)
    thickness = header['space directions'][2][2] 
    if thickness <= 3.1:
        test_patient_dim_3.update({patient : thickness})
    elif thickness >= 4.9:
        test_patient_dim_5.update({patient : thickness})
    else:
        print(patient, thickness)
    print(patient)

In [None]:
print(len(train_patient_dim_3))
print(len(valid_patient_dim_3))
print(len(test_patient_dim_3))

print('---')
print(len(train_patient_dim_5))
print(len(valid_patient_dim_5))
print(len(test_patient_dim_5))


In [None]:
train_patient_dim_3.keys()

In [None]:
valid_patient_dim_3.keys()

In [None]:
test_patient_dim_3.keys()

In [None]:
train_patient_dim_5.keys()

In [None]:
valid_patient_dim_5.keys()

In [None]:
test_patient_dim_5.keys()

In [None]:
imgPath2 = os.path.join(z_train_image, 'P2_image.nrrd')
readdata2, header2 = nrrd.read(imgPath2)

In [None]:
header2['space directions'][2][2]

In [None]:
imgPath = os.path.join(z_train_image, 'P2_image.nrrd')
readdata, header = nrrd.read(imgPath)

In [None]:
for key, value in header.items():
    print(key, ':', value)

In [None]:
header['space directions'], header['space origin']

In [None]:
print(header['space directions'])
print('-------------------')
print(header2['space directions'])
print('-------------------')
print(header['space origin'])
print('-------------------')
print(header2['space origin'])
print('-------------------')

In [None]:
header2['space directions']

In [None]:
header2['space origin']

In [None]:
desired_patient_index = 1
z_train_image + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(1).zfill(3)+'_z.png'

In [None]:
z_img = cv2.imread(z_train_image + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(1).zfill(3)+'_z.png',-1) # Get .png in np array format
z_msk = cv2.imread(z_train_mask + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(1).zfill(3)+'_z.png',-1) # Get .png in np array format

In [None]:
base_width = 256
image = Image.open(z_train_image + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(1).zfill(3)+'_z.png')

In [None]:
width_percent = (base_width / float(image.size[0]))
hsize = int((float(image.size[1] * float(width_percent))))
image = image.resize((base_width, hsize), PIL.Image.ANTIALIAS)

In [None]:
image

In [None]:
desired_width = 256.
width_percentage = (desired_width / float(z_img.shape[0]))
desired_height = (float(z_img.shape[1] * float(width_percentage)))

z_img = z_img.resize((desired_width, desired_height), PIL.Image.ANTIALIAS)

In [None]:
plt.imshow(z_img)
plt.savefig('/home/tester/jianhoong/jh_fyp_work/2D_UNet/trials/ModelPerformanceImages/test2.png',
bbox_inches = 'tight')

In [None]:
print(z_img.shape)
z_msk.shape

In [None]:
np.min(z_img), np.max(z_img), z_img.shape

In [None]:
# Analyzing single patient slices (train set) i.e (P1 : 122 slices)
max_pixel_val = 0
for i in range(z_patient_dict_train[desired_patient_index]):
    z_img = cv2.imread(z_train_image + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(i+1).zfill(3)+'_z.png') # Get .png in np array format
    print(np.min(z_img), np.max(z_img), z_img.shape)
    if np.max(z_img) > max_pixel_val:
        max_pixel_val = np.max(z_img)
max_pixel_val

In [None]:
# Analyzing all patient slices (train set) i.e (210 patients and all their slices)
min_pix_val, max_pix_val = 0, 0

for patient in patient_num_train:
    for i in range(z_patient_dict_train[patient]):
        z_img = cv2.imread(z_train_image + '/P' + str(patient)+'-slice'+'{0}'.format(i+1).zfill(3)+'_z.png') # Get .png in np array format
        print(np.min(z_img), np.max(z_img), z_img.shape)
        if np.max(z_img) > max_pix_val:
            max_pix_val = np.max(z_img)
        if np.min(z_img) < min_pix_val:
            min_pix_val = np.min(z_img)

In [None]:
min_pix_val, max_pix_val

In [None]:
min_pix_val_valid, max_pix_val_valid = 0, 0

for patient in patient_num_valid:
    for i in range(z_patient_dict_valid[patient]):
        z_img = cv2.imread(z_valid_image + '/P' + str(patient)+'-slice'+'{0}'.format(i+1).zfill(3)+'_z.png') # Get .png in np array format
        print(np.min(z_img), np.max(z_img), z_img.shape)
        if np.max(z_img) > max_pix_val_valid:
            max_pix_val_valid = np.max(z_img)
        if np.min(z_img) < min_pix_val_valid:
            min_pix_val_valid = np.min(z_img)

In [None]:
min_pix_val_valid, max_pix_val_valid

In [None]:
# Helper functions to print out patient images


def display_z(display_list, patient_num, current_slice_num):

    plt.figure(figsize = (12,12))
    title = [f'Patient {patient_num} Input Image - Slice(Z) {current_slice_num}', f'Patient {patient_num} True Mask - Slice(Z) {current_slice_num}']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(display_list[i])
    plt.show()

def show_dataset(desired_patient_index, show_only_mask_present = False):

    z_slices = z_patient_dict_train[desired_patient_index]

    for i in range(z_slices): # Iterate through number of slices for desired patient

        current_slice_num = i+1
        patient_num = desired_patient_index

        z_img = cv2.imread(z_train_image + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(i+1).zfill(3)+'_z.png') # Get .png in np array format
        z_msk = cv2.imread(z_train_mask + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(i+1).zfill(3)+'_z.png') # Get .png in np array format

        if show_only_mask_present: # show_only_mask_present flag: Default set to False
            if np.any(z_msk): # Checking for non-zero msk .png (Empty msk.png will return array of all zeros) (think of this as: if any values in msk.png, show me this img)
                display_z([z_img,z_msk], patient_num, current_slice_num)
        else:
            display_z([z_img,z_msk],patient_num, current_slice_num) 
            

In [None]:
# -- Parameters to be set by user ------------------------------------------------------------------------------------------
desired_patient_num = 86
show_mask_only = True # True : Only show slices where masks are present. | False : Show all slices regardless mask presence
# This is important because most mask slices are just blank images. 
# --------------------------------------------------------------------------------------------------------------------------

# num_of_slices = z_patient_dict_train[desired_patient_num]
show_dataset(desired_patient_num,show_mask_only)

### Analyzing 3D Data

In [None]:
def sorted_alnum(l):
    convert = lambda text: int(text) if text.isdigit() else text # Convert digit string into integer else keep as text
    alphanum_key = lambda key : [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key = alphanum_key)

    # To normalize image pixel data to between 0 & 1
HOUNSFIELD_MIN = -1000 # min value of our data : -1000
HOUNSFIELD_MAX = 5000 # max value of our data : 5013
HOUNSFIELD_RANGE = HOUNSFIELD_MAX - HOUNSFIELD_MIN

def normalizeImageIntensityRange(img):
    img[img < HOUNSFIELD_MIN] = HOUNSFIELD_MIN
    img[img > HOUNSFIELD_MAX] = HOUNSFIELD_MAX
    return (img - HOUNSFIELD_MIN) / HOUNSFIELD_RANGE

In [None]:
data_dir = '/home/tester/jianhoong/jh_fyp_work/ct_scans_data/raw_data/'

z_train = os.path.join(data_dir, 'training_data_z')
z_train_image = os.path.join(z_train, 'training_images/training_images')
z_train_mask = os.path.join(z_train, 'training_masks/training_masks')

z_valid = os.path.join(data_dir, 'valid_data_z')
z_valid_image = os.path.join(z_valid, 'valid_images')
z_valid_mask = os.path.join(z_valid, 'valid_masks')

z_test = os.path.join(data_dir, 'testing_data_z')
z_test_image = os.path.join(z_test, 'testing_images')
z_test_mask = os.path.join(z_test, 'testing_masks')

In [None]:
train_mask_names = glob.glob(z_train_mask + '/*.nrrd')
train_mask_names = sorted_alnum(train_mask_names)

In [None]:
train_mask_names

In [None]:
# Code Dump (Deprecated)

# x_train_image = os.path.join(data_dir, 'training_data_x/training_images/training_images')
# y_train_image = os.path.join(data_dir, 'training_data_y/training_images/training_images')

# x_train_mask = os.path.join(data_dir, 'training_data_x/training_masks/training_masks')
# y_train_mask = os.path.join(data_dir, 'training_data_y/training_masks/training_masks')

# x_test_image = os.path.join(data_dir, 'testing_data_x/testing_images/testing_images')
# y_test_image = os.path.join(data_dir, 'testing_data_y/testing_images/testing_images')

# x_test_mask = os.path.join(data_dir, 'testing_data_x/testing_masks/testing_masks')
# y_test_mask = os.path.join(data_dir, 'testing_data_y/testing_masks/testing_masks')




# x_patient_dict_train = dict()
# y_patient_dict_train = dict()

# for i in range(len(patient_num_train)): # i represents each patient's number (Assuming there is no jump in patient number. Else must rewrite logic)
#     current_patient_num = 'P' + str(i+1)
#     current_patient_slice_amt = max(map(int,[re.search(current_patient_num + '-slice(.*)_x.png', file).group(1) for file in os.listdir(x_train_image) if file.startswith(current_patient_num+"-")]))
#     x_patient_dict_train.update({i+1: current_patient_slice_amt})

# for i in range(len(patient_num_train)): # i represents each patient's number (Assuming there is no jump in patient number. Else must rewrite logic)
#     current_patient_num = 'P' + str(i+1)
#     current_patient_slice_amt = max(map(int,[re.search(current_patient_num + '-slice(.*)_y.png', file).group(1) for file in os.listdir(y_train_image) if file.startswith(current_patient_num+"-")]))
#     y_patient_dict_train.update({i+1: current_patient_slice_amt})

# for i in patient_num_valid: # i represents each patient's number (Assuming there is no jump in patient number. Else must rewrite logic)
#     current_patient_num = 'P' + str(i+1)
#     current_patient_slice_amt = max(map(int,[re.search(current_patient_num + '-slice(.*)_z.png', file).group(1) for file in os.listdir(z_valid_image) if file.startswith(current_patient_num+"-")]))
#     z_patient_dict_valid.update({i+1: current_patient_slice_amt})

# for i in patient_num_test: # i represents each patient's number (Assuming there is no jump in patient number. Else must rewrite logic)
#     current_patient_num = 'P' + str(i+1)
#     current_patient_slice_amt = max(map(int,[re.search(current_patient_num + '-slice(.*)_z.png', file).group(1) for file in os.listdir(z_test_image) if file.startswith(current_patient_num+"-")]))
#     z_patient_dict_test.update({i+1: current_patient_slice_amt})




# print(x_patient_dict_train[1])
# print(y_patient_dict_train[1])



# def display_x(display_list, patient_num, current_slice_num):

#     plt.figure(figsize = (10,5))
#     title = [f'Patient {patient_num} Input Image - Slice(X) {current_slice_num}', f'Patient {patient_num} True Mask - Slice(X) {current_slice_num}']

#     for i in range(len(display_list)):
#         plt.subplot(1, len(display_list), i+1)
#         plt.title(title[i])
#         plt.imshow(display_list[i], aspect = 'auto')
#     plt.show()

# def display_y(display_list, patient_num, current_slice_num):

#     plt.figure(figsize = (10,5))
#     title = [f'Patient {patient_num} Input Image - Slice(Y) {current_slice_num}', f'Patient {patient_num} True Mask - Slice(Y) {current_slice_num}']

#     for i in range(len(display_list)):
#         plt.subplot(1, len(display_list), i+1)
#         plt.title(title[i])
#         plt.imshow(display_list[i], aspect = 'auto')
#     plt.show()

	
#     x_slices = x_patient_dict_train[desired_patient_index]
#     y_slices = y_patient_dict_train[desired_patient_index]





#     if show_x:
#         for i in range(x_slices): # Iterate through number of slices for desired patient

#             current_slice_num = i+1
#             patient_num = desired_patient_index

#             x_img = cv2.imread(x_train_image + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(i+1).zfill(3)+'_x.png') # Get .png in np array format
#             x_msk = cv2.imread(x_train_mask + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(i+1).zfill(3)+'_x.png') # Get .png in np array format

#             if show_only_mask_present: # show_only_mask_present flag: Default set to False
#                 if np.any(x_msk): # Checking for non-zero msk .png (Empty msk.png will return array of all zeros) (think of this as: if any values in msk.png, show me this img)
#                     display_x([x_img,x_msk], patient_num, current_slice_num)
                   
#             else:
#                 display_x([x_img,x_msk],patient_num, current_slice_num)
#     if show_y:
#         for i in range(y_slices): # Iterate through number of slices for desired patient

#             current_slice_num = i+1
#             patient_num = desired_patient_index

#             y_img = cv2.imread(y_train_image + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(i+1).zfill(3)+'_y.png') # Get .png in np array format
#             y_msk = cv2.imread(y_train_mask + '/P' + str(desired_patient_index)+'-slice'+'{0}'.format(i+1).zfill(3)+'_y.png') # Get .png in np array format

#             if show_only_mask_present: # show_only_mask_present flag: Default set to False
#                 if np.any(y_msk): # Checking for non-zero msk .png (Empty msk.png will return array of all zeros) (think of this as: if any values in msk.png, show me this img)
#                     display_y([y_img,y_msk], patient_num, current_slice_num)
                    
#             else:
#                 display_y([y_img,y_msk],patient_num, current_slice_num)

