# MLLB Deep Learning Report </font></br></div>



Course: Machine Learning Lab





---


## Data Preprocessing

In [None]:
#imports
import os
import pydicom
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import random
from PIL import Image
from collections import Counter
from scipy.ndimage import zoom
import imageio

In [None]:
base_dir = '/content/drive/My Drive/CHAOS_Train_Sets/Train_Sets/MR_Original_data'
output_path='/content/drive/My Drive/CHAOS_Train_Sets/Train_Sets'

### Class Mapping


We convert all CHAOS ground-truth masks from their original palette values (0, 63, 126, 189, 252) into class IDs (0–4).  
This loops through all patients and modalities, replaces palette values with class labels, and saves the converted masks so they can be used directly for training segmentation models.


In [None]:
# mapping from palette values to class ids
palette_to_class = {
    0: 0,      # background
    63: 1,     # liver
    126: 2,    # right kidney
    189: 3,    # left kidney
    252: 4     # spleen
}


In [None]:
def convert_palette(mask):
    """Convert mask palette values → class IDs."""
    mask = np.array(mask, dtype=np.uint8)
    class_mask = np.zeros_like(mask, dtype=np.uint8)
    for pal, cls in palette_to_class.items():
        class_mask[mask == pal] = cls
    return class_mask


In [None]:
modalities = ["T1DUAL", "T2SPIR"]

# create output folder
output_base = os.path.join(output_path, "converted_masks")
os.makedirs(output_base, exist_ok=True)
for patient in sorted(os.listdir(base_dir)):
    patient_path = os.path.join(base_dir, patient)
    if not os.path.isdir(patient_path):
        continue

    print(f"Processing patient: {patient}")

    for modality in modalities:
        mask_folder = os.path.join(patient_path, modality, "Ground")
        if not os.path.exists(mask_folder):
            continue
        # output folder for this patient/modality
        out_folder = os.path.join(output_base, patient, modality)
        os.makedirs(out_folder, exist_ok=True)

        # loop over all PNG masks
        for filename in sorted(os.listdir(mask_folder)):
            if not filename.endswith(".png"):
                continue

            mask_path = os.path.join(mask_folder, filename)

            # read mask
            mask_img = Image.open(mask_path).convert("L")

            # convert palette → class IDs
            converted = convert_palette(mask_img)

            # save output
            out_path = os.path.join(out_folder, filename)
            Image.fromarray(converted).save(out_path)

        print(f"  {modality}: converted and saved.")

### Resampling images

### Resizing images

resizing images and masks for all patients with diff image size

In [None]:
patients=['13', '19', '2', '20', '3', '38', '8']
target_shape = (256, 256)  

for patient in patients:
    print(f"Processing patient: {patient}")
    dicom_folder = img_folder = os.path.join(base_dir, patient, 'T2SPIR', "DICOM_anon")

    # Read all DICOM files
    dicom_files = sorted([os.path.join(dicom_folder, f) for f in os.listdir(dicom_folder) if f.endswith('.dcm')])

    # Load slices and stack into 3D array
    slices = [pydicom.dcmread(f).pixel_array for f in dicom_files]
    img_data = np.stack(slices, axis=-1)  # shape: (height, width, num_slices)

    print("Original shape:", img_data.shape)
    # Linear interpolation for intensity images
    zoom_factors = (
    target_shape[0] / img_data.shape[0],
    target_shape[1] / img_data.shape[1],
    1
    )
    img_resized = zoom(img_data, zoom_factors, order=1)
    print(img_resized.shape)
    #Save all images resized
    resized_image_folder = os.path.join(output_path, 'resized_data', patient , 'T2SPIR',"DICOM_anon")
    os.makedirs(resized_image_folder, exist_ok=True)
    num_slices = img_resized.shape[2]

    for i in range(num_slices):
        # Read original DICOM header
        ds = pydicom.dcmread(dicom_files[i])

        # Extract the resized slice
        slice_resized = img_resized[:, :, i]

        # Ensure correct dtype (match original)
        slice_resized = slice_resized.astype(ds.pixel_array.dtype)

        # Update pixel array and DICOM fields
        ds.PixelData = slice_resized.tobytes()
        ds.Rows, ds.Columns = slice_resized.shape

        original_filename = os.path.basename(dicom_files[i])

        # Save resized slice using original filename
        save_path = os.path.join(resized_image_folder, original_filename)
        ds.save_as(save_path)
    # resize masks
    mask_folder = os.path.join(output_path, 'converted_masks', patient, 'T2SPIR')
    resized_mask_folder = os.path.join(output_path, 'resized_data', patient , 'T2SPIR',"Ground")

    # Create output folder if it doesn't exist
    os.makedirs(resized_mask_folder, exist_ok=True)

    # Get all mask file paths
    mask_paths = sorted([os.path.join(mask_folder, f) for f in os.listdir(mask_folder) if f.endswith('.png')])

    for path in mask_paths:
        # Read mask without changing pixel values
        mask = cv2.imread(path, cv2.IMREAD_UNCHANGED)

        # Resize using nearest neighbor interpolation to preserve labels
        resized_mask = cv2.resize(mask, (256,256), interpolation=cv2.INTER_NEAREST)

        # Save resized mask
        filename = os.path.basename(path)
        save_path = os.path.join(resized_mask_folder, filename)
        cv2.imwrite(save_path, resized_mask)



In [None]:
for modality in ["InPhase","OutPhase"]:
    for patient in patients:
        print(f"Processing patient: {patient}")
        dicom_folder = img_folder = os.path.join(base_dir, patient, 'T1DUAL', "DICOM_anon",modality)

        # Read all DICOM files
        dicom_files = sorted([os.path.join(dicom_folder, f) for f in os.listdir(dicom_folder) if f.endswith('.dcm')])

        # Load slices and stack into 3D array
        slices = [pydicom.dcmread(f).pixel_array for f in dicom_files]
        img_data = np.stack(slices, axis=-1)  # shape: (height, width, num_slices)

        print("Original shape:", img_data.shape)
        # Linear interpolation for intensity images
        img_resized = zoom(img_data, zoom_factors, order=1)
        #Save all images resized
        resized_image_folder = os.path.join(output_path, 'resized_data', patient , 'T1DUAL',"DICOM_anon",modality)
        os.makedirs(resized_image_folder, exist_ok=True)
        num_slices = img_resized.shape[2]

        for i in range(num_slices):
            # Read original DICOM header
            ds = pydicom.dcmread(dicom_files[i])

            # Extract the resized slice
            slice_resized = img_resized[:, :, i]

            # Ensure correct dtype (match original)
            slice_resized = slice_resized.astype(ds.pixel_array.dtype)

            # Update pixel array and DICOM fields
            ds.PixelData = slice_resized.tobytes()
            ds.Rows, ds.Columns = slice_resized.shape

            original_filename = os.path.basename(dicom_files[i])

            # Save resized slice using original filename
            save_path = os.path.join(resized_image_folder, original_filename)
            ds.save_as(save_path)
        # resize masks
        mask_folder = os.path.join(output_path, 'converted_masks', patient, 'T1DUAL')
        resized_mask_folder = os.path.join(output_path, 'resized_data', patient , 'T1DUAL',"Ground")

        # Create output folder if it doesn't exist
        os.makedirs(resized_mask_folder, exist_ok=True)

        # Get all mask file paths
        mask_paths = sorted([os.path.join(mask_folder, f) for f in os.listdir(mask_folder) if f.endswith('.png')])

        for path in mask_paths:
            # Read mask without changing pixel values
            mask = cv2.imread(path, cv2.IMREAD_UNCHANGED)

            # Resize using nearest neighbor interpolation to preserve labels
            resized_mask = cv2.resize(mask, (256,256), interpolation=cv2.INTER_NEAREST)

            # Save resized mask
            filename = os.path.basename(path)
            save_path = os.path.join(resized_mask_folder, filename)
            cv2.imwrite(save_path, resized_mask)



Comparaison between original annd resized data for random slices

In [None]:

# Paths
original_image_folder = os.path.join(base_dir, '13', 'T2SPIR', "DICOM_anon")
resized_image_folder = os.path.join(output_path, 'resized_data', '13', 'T2SPIR',"DICOM_anon")
original_mask_folder = os.path.join(output_path, 'converted_masks', '13', 'T2SPIR')
resized_mask_folder = os.path.join(output_path, 'resized_data', '13', 'T2SPIR',"Ground")

# Load slices
# For simplicity, assume img_resized was saved as separate slices in a folder as .png or .dcm (adjust if not)
original_image_files=sorted([os.path.join(original_image_folder, f) for f in os.listdir(original_image_folder)])
resized_image_files = sorted([os.path.join(resized_image_folder, f) for f in os.listdir(resized_image_folder)])
resized_mask_files = sorted([os.path.join(resized_mask_folder, f) for f in os.listdir(resized_mask_folder)])
original_mask_files = sorted([os.path.join(original_mask_folder, f) for f in os.listdir(original_mask_folder)])

# Pick a random slice index
slice_idx = random.randint(0, len(resized_image_files) - 1)

# Load images and masks
# Original image slice (DICOM)
import pydicom
original_image_dicom = pydicom.dcmread(original_image_files[slice_idx])
print(original_image_files[slice_idx])
original_image = original_image_dicom.pixel_array

# Resized image slice
# Corrected: Use pydicom to read the resized DICOM file
ds_resized = pydicom.dcmread(resized_image_files[slice_idx])
print(resized_image_files[slice_idx])
resized_image = ds_resized.pixel_array

# Original mask
original_mask = cv2.imread(original_mask_files[slice_idx], cv2.IMREAD_UNCHANGED)
print(original_mask_files[slice_idx])
# Resized mask
resized_mask = cv2.imread(resized_mask_files[slice_idx], cv2.IMREAD_UNCHANGED)
print(resized_mask_files[slice_idx])
# Plotting side by side
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Original image + mask
axes[0].imshow(original_image, cmap='gray')
axes[0].contour(original_mask, colors='r', linewidths=0.5)
axes[0].set_title("Original Image with Mask")

# Resized image + mask
axes[1].imshow(resized_image, cmap='gray')
axes[1].contour(resized_mask, colors='r', linewidths=0.5)
axes[1].set_title("Resized Image with Mask")

plt.tight_layout()
plt.show()
