Add necessary .py scripts to PATH

In [1]:
import sys
import os

path_prefix = "/home/kanthoulis/spider/"

transforms_dir = path_prefix + "transforms"
image_dir = path_prefix + "image"

sys.path.append(transforms_dir)
sys.path.append(image_dir)

Dependencies

In [2]:
import SimpleITK as sitk
reader = sitk.ImageFileReader()
reader.SetImageIO("MetaImageIO")
import numpy as np
import os
import pathlib
from natsort import natsorted
import sys
import shutil
from tqdm import tqdm

import mri
import mri_transforms, array_transforms

Paths

In [3]:
path_prefix = "/home/kanthoulis/spider/dataset/"
#3D image directories
train_img_dir = pathlib.Path(path_prefix  + "train_images")
train_label_dir = pathlib.Path(path_prefix + "train_labels")
test_img_dir = pathlib.Path(path_prefix + "test_images")
test_label_dir = pathlib.Path(path_prefix + "test_labels")

#Directories to extract the 2D slices from the 3D images
train_img_slice_dir = pathlib.Path(path_prefix + "train_image_slices")
train_label_slice_dir = pathlib.Path(path_prefix + "train_label_slices")
test_img_slice_dir = pathlib.Path(path_prefix + "test_image_slices")
test_label_slice_dir= pathlib.Path(path_prefix + "test_label_slices")

Directory lists

In [4]:
#Get lists of the files in the directories 
image_train_dir_list = os.listdir(train_img_dir) 
label_train_dir_list = os.listdir(train_label_dir)
image_test_dir_list = os.listdir(test_img_dir) 
label_test_dir_list = os.listdir(test_label_dir)

#Sort the lists using natsort 
    # for sorting to format: 1_t1.mha, 1_t2.mha, 2_t1.mha ...so on
image_train_dir_list = natsorted(image_train_dir_list)
label_train_dir_list = natsorted(label_train_dir_list)
image_test_dir_list = natsorted(image_test_dir_list)
label_test_dir_list = natsorted(label_test_dir_list)

#Checking for same length for corresponding image/label lists on train/test
image_train_dirlen = len(image_train_dir_list)
label_train_dirlen = len(label_train_dir_list)
image_test_dirlen = len(image_test_dir_list)
label_test_dirlen = len(label_test_dir_list)

#sys.exit on length mismatch
if(image_train_dirlen != label_train_dirlen):
    sys.exit("Error: Training directories don't have the same amount of images")
elif(image_test_dirlen != label_test_dirlen):
    sys.exit("Error: Validation directories don't have the same amount of images")
else:
    print("Directory lengths OK")
#Continuing after checks assign lengths to vars for iterating through each directory
train_dirlen = image_train_dirlen
test_dirlen = image_test_dirlen

print("No of 3D series in train set:", train_dirlen)
print("No of 3D series in test set:", test_dirlen)


Directory lengths OK
No of 3D series in train set: 304
No of 3D series in test set: 76


In [6]:
#Extracting slices for TRAINING images/labels
for idx in tqdm(range(0, train_dirlen), desc="Processing TRAIN images/labels", unit="image"):
    img_path = train_img_dir.joinpath(image_train_dir_list[idx])
    label_path = train_label_dir.joinpath(label_train_dir_list[idx]) #first part before joinpath is pathlib.Path, second part is the directory of the file 

    #Get 3D array after pre-processing
    image = mri.Mri(img_path, is_label=False, is_train_set=True)
    label = mri.Mri(label_path, is_label=True, is_train_set=True) 

    #Copy
    image_a = image.hu_a
    label_a = label.hu_a
    
    #Remove slices with no corresponding mask in label 
    image_a, label_a = array_transforms.remove_empty_slices(image_a, label_a)

    #Extract slices after processing to corresponding directories 
    array_transforms.extract_slices(image_a, image_train_dir_list[idx], train_img_slice_dir) 
    array_transforms.extract_slices(label_a, label_train_dir_list[idx], train_label_slice_dir) 

#Extracting slices for TEST images/labels
for idx in tqdm(range(0, test_dirlen), desc="Processing TEST images/labels", unit="image"):
    img_path = test_img_dir.joinpath(image_test_dir_list[idx])
    label_path = test_label_dir.joinpath(label_test_dir_list[idx]) #first part before joinpath is pathlib.Path, second part is the directory of the file 

    #Get 3D array after pre-processing
    image = mri.Mri(img_path, is_label=False, is_train_set=True)
    label = mri.Mri(label_path, is_label=True, is_train_set=True)  #NOTE going to end up resampling ALL images to common voxel spacing to get more data 

    #Copy
    image_a = image.hu_a
    label_a = label.hu_a
 
    #Extract slices after processing to corresponding directories 
    array_transforms.extract_slices(image_a, image_test_dir_list[idx], test_img_slice_dir) 
    array_transforms.extract_slices(label_a, label_test_dir_list[idx], test_label_slice_dir) 


Processing TRAIN images/labels: 100%|██████████| 304/304 [02:27<00:00,  2.06image/s]
Processing TEST images/labels: 100%|██████████| 76/76 [00:33<00:00,  2.28image/s]


Delete old directories remove clutter

In [7]:
def delete_folder(folder_path):
    if folder_path.exists() and folder_path.is_dir():
        shutil.rmtree(folder_path)
        print(f"Deleted folder: {folder_path}")
    else:
        print(f"Folder not found or already deleted: {folder_path}")

delete_folder(train_img_dir)
delete_folder(train_label_dir)
delete_folder(test_img_dir)
delete_folder(test_label_dir)

Deleted folder: /home/kanthoulis/spider/dataset/train_images
Deleted folder: /home/kanthoulis/spider/dataset/train_labels
Deleted folder: /home/kanthoulis/spider/dataset/test_images
Deleted folder: /home/kanthoulis/spider/dataset/test_labels
