In [28]:
import os, glob
import sys
import copy 
import pydicom
import scipy
import scipy.misc
import numpy as np
import cv2
import imageio
from scipy.ndimage import rotate
from PIL import Image
from zipfile import ZipFile
import shutil

### Manipulating dataset


In [29]:
dataset_path = "Dataset"

In [30]:

with ZipFile(os.path.join(dataset_path,"3Dircadb1.zip"), 'r') as zipObj:
        try:
            zipObj.extractall(dataset_path)
        except Exception as error:
            print(error)

In [31]:
def extract_zip_files(directory):
    """
    Extract ZIP files within a directory.

    Args:
    directory (str): Path to the directory containing ZIP files.
    """
    for dir_path in os.listdir(directory):
        dir_full_path = os.path.join(directory, dir_path)

        if os.path.isdir(dir_full_path):
            # Extract PATIENT_DICOM.zip if it exists
            patient_dicom_zip_path = os.path.join(dir_full_path, "PATIENT_DICOM.zip")
            if os.path.isfile(patient_dicom_zip_path):
                with ZipFile(patient_dicom_zip_path, 'r') as zipObj:
                    zipObj.extractall(dir_full_path)
                print(f"Extracted PATIENT_DICOM.zip in {dir_path}")

            # Extract MASKS_DICOM.zip if it exists
            masks_dicom_zip_path = os.path.join(dir_full_path, "MASKS_DICOM.zip")
            if os.path.isfile(masks_dicom_zip_path):
                with ZipFile(masks_dicom_zip_path, 'r') as zipObj:
                    zipObj.extractall(dir_full_path)
                print(f"Extracted MASKS_DICOM.zip in {dir_path}")

dataset_path = "Dataset/3Dircadb1"

extract_zip_files(dataset_path)


In [33]:
# Define the root directory where patient data is stored
root_directory = "Dataset/3Dircadb1"

# Iterate through top-level directories
for folder in os.listdir(root_directory):
    # Split the folder name to extract the patient's ID
    patient_id = folder.split('db')[1]

    folder_path = os.path.join(root_directory, folder)

    # Check if it's a directory
    if os.path.isdir(folder_path):
        # Iterate through subdirectories
        for subfolder in os.listdir(folder_path):
            subfolder_path = os.path.join(folder_path, subfolder)

            # Check if it's a directory
            if os.path.isdir(subfolder_path):
                # Iterate through sub-subdirectories
                for sub_sub in os.listdir(subfolder_path):
                    sub_sub_path = os.path.join(subfolder_path, sub_sub)

                    # Check if it's a directory
                    if os.path.isdir(sub_sub_path):
                        # Iterate through files in the sub-subdirectory
                        for image in os.listdir(sub_sub_path):
                            src = os.path.join(sub_sub_path, image)
                            dst = os.path.join(sub_sub_path, f"{patient_id}_{image}")
                            os.rename(src, dst)
                    
                    # Rename the sub-subdirectory
                    src = os.path.join(subfolder_path, sub_sub)
                    dst = os.path.join(subfolder_path, f"{patient_id}_{sub_sub}")
                    os.rename(src, dst)


In [34]:
def create_directory(directory):
    try:
        os.mkdir(directory)
    except OSError as error:
        print(error)

def organize_data(dataset_path, destination_path):
    # Create the 'train' directory if it doesn't exist
    create_directory(destination_path)

    # Create 'patients' and 'masks' subdirectories
    desired_scans_path = os.path.join(destination_path, "patients")
    desired_masks_path = os.path.join(destination_path, "masks")
    create_directory(desired_scans_path)
    create_directory(desired_masks_path)

    for patient_path in os.listdir(dataset_path):
        patient_full_path = os.path.join(dataset_path, patient_path)

        if os.path.isdir(patient_full_path):
            # Process patient scans
            patient_scans_path = os.path.join(patient_full_path, "PATIENT_DICOM")
            if os.path.isdir(patient_scans_path):
                for scan in os.listdir(patient_scans_path):
                    src_scan_path = os.path.join(patient_scans_path, scan)
                    dst_scan_path = os.path.join(desired_scans_path, scan)
                    shutil.move(src_scan_path, dst_scan_path)

            # Process patient masks
            patient_masks_path = os.path.join(patient_full_path, "MASKS_DICOM")
            if os.path.isdir(patient_masks_path):
                for mask in os.listdir(patient_masks_path):
                    src_mask_path = os.path.join(patient_masks_path, mask)
                    dst_mask_path = os.path.join(desired_masks_path, mask)
                    shutil.move(src_mask_path, dst_mask_path)

    # Create 'merged_livertumors' subdirectory
    merged_livertumors_path = os.path.join(desired_masks_path, "merged_livertumors")
    create_directory(merged_livertumors_path)

# Define the source dataset path and destination path
dataset_path = "Dataset/3Dircadb1"
destination_path = "train"

# Organize the data
organize_data(dataset_path, destination_path)


In [35]:
# COUNT THE NUMBER OF SCANS FOR 1 PATIENT TO APPEND ON THEM
def count_scans_startwith(directory, prefix):
    count = 0
    for file in os.listdir(directory):
        if file.startswith(prefix+'_'):
            count+=1
    return count

In [36]:
import os
import numpy as np
import pydicom
from PIL import Image

def merge_liver_tumors(scans_path, masks_path):
    for scan_filename in os.listdir(scans_path):
        # Extract patient ID from the scan filename
        patient_id = scan_filename.split('_')[0]

        # Initialize an empty tumor volume
        merged_tumor_volume = None

        # Iterate through mask directories to find liver tumor masks for the same patient
        for mask_dir in os.listdir(masks_path):
            if mask_dir.startswith(patient_id + '_livertumor'):
                mask_file_path = os.path.join(masks_path, mask_dir, scan_filename)

                # Read the mask DICOM file and convert it to a binary mask
                tumor_mask = pydicom.dcmread(mask_file_path).pixel_array
                tumor_mask = tumor_mask / 255.0
                tumor_mask = np.clip(tumor_mask, 0, 1)

                # Merge the tumor mask into the tumor volume
                if merged_tumor_volume is None:
                    merged_tumor_volume = tumor_mask
                else:
                    merged_tumor_volume = np.logical_or(merged_tumor_volume, tumor_mask)

        # If no tumor masks were found for the patient, create an empty mask
        if merged_tumor_volume is None:
            merged_tumor_volume = np.zeros((512, 512))

        # Convert the merged tumor volume to a grayscale image (0-255) and save it as a JPG file
        merged_tumor_volume = (merged_tumor_volume * 255).astype(np.uint8)
        merged_tumor_image = Image.fromarray(merged_tumor_volume)
        merged_tumor_image.save(os.path.join(masks_path, 'merged_livertumors', scan_filename + '.jpg'))



In [37]:
def reflect_dicom(src_img, src_mask, src_liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count):
    # Reflect the image horizontally
    reflected_img = copy.deepcopy(src_img)
    reflected_img.PixelData = np.fliplr(reflected_img.pixel_array).tobytes()

    # Reflect the liver mask horizontally
    reflected_liver_mask = copy.deepcopy(src_liver_mask)
    reflected_liver_mask.PixelData = np.fliplr(reflected_liver_mask.pixel_array).tobytes()
    reflected_liver_mask.save_as(os.path.join(liver_mask_path, f"{patient_id}_image_{patient_imgs_count}_augref"))

    # Reflect the mask horizontally
    reflected_mask = np.fliplr(src_mask)

    # Save the reflected image
    reflected_img.save_as(os.path.join(train_path, f"{patient_id}_image_{patient_imgs_count}_augref"))

    # Save the reflected mask as a JPEG file
    reflected_mask = reflected_mask.astype(np.uint8)
    imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', f"{patient_id}_image_{patient_imgs_count}_augref.jpg"), reflected_mask)

def rotate_dicom(src_img, src_mask, src_liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count, angle):
    # Rotate the image and mask by the specified angle
    rotated_img = copy.deepcopy(src_img)
    rotated_img.PixelData = rotate(rotated_img.pixel_array, angle, reshape=False).tobytes()

    # Rotate the liver mask
    rotated_liver_mask = copy.deepcopy(src_liver_mask)
    rotated_liver_mask.PixelData = rotate(rotated_liver_mask.pixel_array, angle, reshape=False).tobytes()
    rotated_liver_mask.save_as(os.path.join(liver_mask_path, f"{patient_id}_image_{patient_imgs_count}_augrot"))

    # Rotate the mask
    rotated_mask = rotate(src_mask, angle, reshape=False)

    # Save the rotated image
    rotated_img.save_as(os.path.join(train_path, f"{patient_id}_image_{patient_imgs_count}_augrot"))

    # Save the rotated mask as a JPEG file
    rotated_mask = rotated_mask.astype(np.uint8)
    imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', f"{patient_id}_image_{patient_imgs_count}_augrot.jpg"), rotated_mask)




In [38]:
# AUGMENT THE MASKS WITH TUMORS TO FIX CLASS IMBALANCING

def augment_dicom(train_path, masks_path):
    # Define a list of rotation angles (e.g., 10, 20, ..., 350 degrees)
    rotation_angles = [angle for angle in range(90, 360, 90)]

    # Get a list of DICOM scan files in the training path
    train_files = os.listdir(train_path)

    # Iterate through each scan file
    for scan in train_files:
        # Construct the path to the corresponding tumor mask image
        mask_path = os.path.join(masks_path, 'merged_livertumors', scan + '.jpg')

        # Load the tumor mask image using OpenCV
        tumor_mask = cv2.imread(mask_path)

        # Check if the tumor mask contains a tumor (i.e., pixel value of 1)
        if 1 in tumor_mask:
            # Extract the patient ID from the scan filename
            patient_id = scan.split('_')[0]

            # Count the number of scans for the same patient
            patient_imgs_count = count_scans_startwith(train_path, patient_id)

            # Load the original DICOM image and liver mask
            original_img = pydicom.dcmread(os.path.join(train_path, scan))
            liver_mask_path = os.path.join(masks_path, patient_id + '_liver')
            liver_mask = pydicom.dcmread(os.path.join(liver_mask_path, scan))

            # Apply reflection to the DICOM image and masks
            reflect_dicom(original_img, tumor_mask, liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count)

            # Apply rotation with various angles to the DICOM image and masks
            for angle in rotation_angles:
                patient_imgs_count += 1
                rotate_dicom(original_img, tumor_mask, liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count, angle)



In [39]:
masks_path = os.path.join('train','masks')
training_path = os.path.join('train','patients')

merge_liver_tumors(training_path, masks_path)
augment_dicom(training_path, masks_path)

print('done')

done


In [40]:
count = 0
for mask in os.listdir(os.path.join(masks_path,'merged_livertumors')):
#     print(mask)
    tumor_mask = cv2.imread(os.path.join(masks_path,'merged_livertumors', mask))
#     print(tumor_mask)
    if 1 in tumor_mask:
        count += 1
print(count)
# 568 without augmentation

20974


In [41]:
# REMOVE AUGMENTED DATA
for scan in os.listdir(training_path):
    if scan.endswith('_augref') or scan.endswith('_augrot'):
        os.remove(os.path.join(training_path,scan))

for mask_dir in os.listdir(masks_path):
    if mask_dir.endswith('liver'):
        mask_dir_path = os.path.join(masks_path, mask_dir)
        for liver_mask in os.listdir(mask_dir_path):
            if liver_mask.endswith('augref') or liver_mask.endswith('augrot') or liver_mask.endswith('aug'):
                os.remove(os.path.join(mask_dir_path, liver_mask))

for mask in os.listdir(os.path.join(masks_path, 'merged_livertumors')):
    if mask.endswith('_augref.jpg') or mask.endswith('augrot.jpg'):
        os.remove(os.path.join(masks_path, 'merged_livertumors', mask))
print('done')

done
