# Preprocessing: conversion of DICOM files into NIfTI files

In [None]:
# Install pydicom, a library for handling DICOM medical images
!pip install pydicom

In [None]:
import os
import numpy as np
import pydicom
import nibabel as nib
from scipy import ndimage
import math

In [None]:
def normalize(image):
    """
    Normalize pixel values of a DICOM image.

    - Values below 0 are set to 0.
    - Values above 500 are set to 500.
    - The image is then scaled between 0 and 1.

    This normalization focuses on Hounsfield Units (HU) values
    relevant for brain and hematoma regions.
    """
    image[image < 0] = 0   # Set all negative values to 0
    image[image > 500] = 500  # Cap all values above 500
    image = image / 500  # Normalize to range [0, 1]
    return image

In [None]:
def get_processed_images(path):
    """
    Read a DICOM file and process it.

    - Reads the pixel array from the DICOM file.
    - Applies RescaleIntercept to adjust pixel values.
    - Normalizes the image.
    - Returns the processed image rounded to 10 decimal places.
    """
    dicom_file = pydicom.dcmread(path)  # Read the DICOM file
    image = dicom_file.pixel_array + dicom_file.RescaleIntercept  # Apply intensity adjustment
    image = normalize(image)  # Normalize the image
    return image.round(10)  # Round to 10 decimal places

In [None]:
def process_and_save_subfolders(input_folders, output_folder):
    """
    Process DICOM images in the given folders and save each subfolder as a single .nii.gz file.

    - Reads DICOM files from multiple input folders.
    - Stacks the processed images into a 3D NIfTI volume.
    - Saves each subfolder’s images as a single .nii.gz file.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)  # Create output folder if it doesn't exist

    subfolder_count = 0  # Initialize subfolder counter

    # Collect all subfolders from input directories
    all_subfolders = []
    for folder in input_folders:
        for root, dirs, _ in os.walk(folder):
            all_subfolders.extend([os.path.join(root, d) for d in dirs])

    all_subfolders = sorted(all_subfolders)  # Sort subfolders alphabetically

    for subfolder in all_subfolders:
        # Get list of DICOM files in the subfolder
        dicom_files = [f for f in os.listdir(subfolder) if f.lower().endswith('.dcm')]

        # Sort files based on InstanceNumber (slice order)
        dicom_files = sorted(dicom_files, key=lambda f: int(pydicom.dcmread(os.path.join(subfolder, f)).InstanceNumber))

        if dicom_files:
            processed_images = []  # List to store processed images

            # Process each DICOM file in the subfolder
            for dicom_file in dicom_files:
                dicom_path = os.path.join(subfolder, dicom_file)
                processed_image = get_processed_images(dicom_path)
                processed_images.append(processed_image)

            # Stack the processed images into a 3D volume (height, width, num_slices)
            volume = np.stack(processed_images, axis=-1)  # Stack along the last dimension
            print(f"Final volume shape: {volume.shape}")  # Should print (512, 512, num_slices)

            # Create a NIfTI image
            nifti_image = nib.Nifti1Image(volume, affine=np.eye(4))

            # Save the volume as a single .nii.gz file
            subfolder_count += 1
            subfolder_id = f"{subfolder_count:03d}"  # Format as three-digit number (e.g., 001, 002)
            output_file_name = f"case_{subfolder_id}_0000.nii.gz"
            output_file_path = os.path.join(output_folder, output_file_name)

            nib.save(nifti_image, output_file_path)  # Save the NIfTI file
            print(f"Saved: {output_file_path}")

In [None]:
# Define paths to the DICOM dataset folders (modify as needed)
input_folders = "PATH1"  # Replace with your input folder paths
output_folder = "PATH2"  # Replace with your output folder path

# Process all subfolders and save NIfTI files
process_and_save_subfolders(input_folders, output_folder)