In [2]:
from typing import Dict, Any, Union
import numpy as np
import pydicom
import logging
import os
import shutil

In [3]:
# Constants for keys
KEYS = {
    'Patient': 'PatientID',
    'BodyPartExamined': 'BodyPartExamined',
    'ImagePositionPatient': 'ImagePositionPatient',
    'ImageOrientationPatient': 'ImageOrientationPatient',
    'PhotometricInterpretation': 'PhotometricInterpretation',
    'Rows': 'Rows',
    'Columns': 'Columns',
    'PixelSpacing': 'PixelSpacing',
    'WindowCenter': 'WindowCenter',
    'WindowWidth': 'WindowWidth',
    'Modality': 'Modality',
    'StudyInstanceUID': 'StudyInstanceUID',
    'SeriesInstanceUID': 'SeriesInstanceUID',
    'StudyID': 'StudyID',
    'SamplesPerPixel': 'SamplesPerPixel',
    'BitsAllocated': 'BitsAllocated',
    'BitsStored': 'BitsStored',
    'HighBit': 'HighBit',
    'PixelRepresentation': 'PixelRepresentation',
    'RescaleIntercept': 'RescaleIntercept',
    'RescaleSlope': 'RescaleSlope',
    'img_min': 'img_min',
    'img_max': 'img_max',
    'img_mean': 'img_mean',
    'img_std': 'img_std'
}

In [4]:
def extract_metadata(image_data: pydicom.dataset.FileDataset) -> Dict[str, Any]:
    """
    Extracts metadata from a DICOM dataset.

    Args:
    - image_data (pydicom.dataset.FileDataset): DICOM dataset.

    Returns:
    - Dict[str, Any]: Dictionary containing extracted metadata.
    """
    metadata = {}
    for key, dicom_key in KEYS.items():
        try:
            value = getattr(image_data, dicom_key, 'Unknown')
            metadata[key] = value
        except Exception as e:
            logging.warning(f"Error extracting {key}: {e}")
            metadata[key] = 'Unknown'
    return metadata

In [8]:
def extract_image_statistics(image: np.ndarray) -> Dict[str, Any]:
    """
    Extracts statistics from an image.

    Args:
    - image (np.ndarray): Image data.

    Returns:
    - Dict[str, Any]: Dictionary containing image statistics.
    """
    return {
        'img_min': np.min(image),
        'img_max': np.max(image),
        'img_mean': np.mean(image),
        'img_std': np.std(image)
    }

In [9]:
def extract_dicom_meta_data(filename: str) -> Union[Dict[str, Union[str, Any]], None]:
    try:
        image_data = pydicom.read_file(filename)
        img = np.array(image_data.pixel_array).flatten()

        metadata = extract_metadata(image_data)
        image_stats = extract_image_statistics(img)

        return {**metadata, **image_stats}

    except FileNotFoundError as e:
        logging.error(f"File not found for {filename} - {e}")
    except pydicom.errors.InvalidDicomError as e:
        logging.error(f"Invalid DICOM file {filename} - {e}")
    except Exception as e:
        logging.error(f"An error occurred while processing {filename} - {e}")

    return None

In [10]:
extract_dicom_meta_data('../data/ID00419637202311204720264/10.dcm')

{'Patient': 'ID00419637202311204720264',
 'BodyPartExamined': 'Chest',
 'ImagePositionPatient': [-212.600, -170.700, -54.000],
 'ImageOrientationPatient': [1.000000, 0.000000, 0.000000, 0.000000, 1.000000, 0.000000],
 'PhotometricInterpretation': 'MONOCHROME2',
 'Rows': 1100,
 'Columns': 888,
 'PixelSpacing': [0.820312, 0.820312],
 'WindowCenter': '-500.0',
 'WindowWidth': '-1500.0',
 'Modality': 'CT',
 'StudyInstanceUID': '2.25.24142809421030670533829469959522337181',
 'SeriesInstanceUID': '2.25.72922870310959542985933229225637751168',
 'StudyID': '',
 'SamplesPerPixel': 1,
 'BitsAllocated': 16,
 'BitsStored': 16,
 'HighBit': 15,
 'PixelRepresentation': 1,
 'RescaleIntercept': '0.0',
 'RescaleSlope': '1.0',
 'img_min': -3024,
 'img_max': 1772,
 'img_mean': -256.72834254709255,
 'img_std': 733.2332408732125}

In [18]:
def categorize_dicom_by_modality_and_patient(source_folder: str, output_folder: str) -> None:
    """
    Categorizes DICOM files from a source folder based on their modality value
    and patient ID, and moves them into respective output folders.

    Args:
    - source_folder (str): Path to the folder containing DICOM files.
    - output_folder (str): Path to the output folder to organize the files.
    """
    try:
        os.makedirs(output_folder, exist_ok=True)

        for root, _, files in os.walk(source_folder):
            for file in files:
                file_path = os.path.join(root, file)
                if file.endswith('.dcm'):
                    try:
                        image_data = pydicom.read_file(file_path)
                        modality = getattr(image_data, KEYS['Modality'], 'Unknown')
                        patient_id = getattr(image_data, KEYS['Patient'], 'Unknown')
                        
                        modality_folder = os.path.join(output_folder, modality)
                        os.makedirs(modality_folder, exist_ok=True)

                        patient_folder = os.path.join(modality_folder, patient_id)
                        os.makedirs(patient_folder, exist_ok=True)

                        shutil.copy(file_path, os.path.join(patient_folder, file))

                    except Exception as e:
                        logging.warning(f"Error processing {file_path}: {e}")
    
    except Exception as e:
        logging.error(f"An error occurred while categorizing DICOM files: {e}")


In [22]:
def generate_report_for_folders(output_folder: str) -> None:
    """
    Generates a report for each folder in the output_folder with extracted metadata
    from a random DICOM file within that folder.

    Args:
    - output_folder (str): Path to the output folder containing categorized DICOM files.
    """
    try:
        for root, dirs, _ in os.walk(output_folder):
            for folder in dirs:
                folder_path = os.path.join(root, folder)
                files = [f for f in os.listdir(folder_path) if f.endswith('.dcm')]
                if files:
                    random_file = np.random.choice(files)
                    random_file_path = os.path.join(folder_path, random_file)
                    try:
                        image_data = pydicom.read_file(random_file_path)
                        metadata = extract_metadata(image_data)
                        report_file = os.path.join(folder_path, f"{folder}_report.txt")
                        with open(report_file, 'w') as report:
                            report.write("Metadata for a random DICOM file:\n")
                            for key, value in metadata.items():
                                report.write(f"{key}: {value}\n")
                    except Exception as e:
                        logging.warning(f"Error processing {random_file_path}: {e}")
    except Exception as e:
        logging.error(f"An error occurred while generating reports: {e}")

In [19]:
categorize_dicom_by_modality_and_patient('../data/', 'other')

In [21]:
generate_report_for_folders('other/CT/')