This code aims to decode the XNAT Segmentation. The way XNAT store segmentations is to save it as a stacked version (all segmentations in all slices will be saved in one single dicom file, stacked on top of each othter). 

Step 1: Septate segmentation
Step 2: Original Image
Step 2: Map segment to the original image


# Step 1: Segmentation File

In [1]:
patient_directory=r"F:\XNAT_BACK\archive\PanCanAID\arc001\1000256"

import os
import xml.etree.ElementTree as ET
from typing import Optional, Dict, Any
import logging
import pydicom
# Configure logging
logging.basicConfig(level=logging.INFO)

## Find assessor folder and segmentation info

In [2]:
def assessors_path(patient_directory: str) -> Optional[str]:
    """
    This function takes the path to a patient directory and returns the path of the ASSESSORS directory if it exists.
    
    Parameters:
    - patient_directory (str): The directory path of the patient.
    
    Returns:
    - Optional[str]: The path to the ASSESSORS directory if it exists, otherwise None.
    """
    assessors_dir = os.path.join(patient_directory, 'ASSESSORS')
    if not os.path.exists(assessors_dir):
        logging.info("No segmentation exists")
        return None
    logging.info(f"'ASSESSORS' folder exists: {assessors_dir}")
    return assessors_dir


def get_segmentations_from_assessors_path(assessors_path: str) -> Dict[str, Dict[str, Any]]:
    """
    This function searches the ASSESSORS folder and creates a dictionary of all segmentations.
    
    Parameters:
    - assessors_path (str): The path to the ASSESSORS directory.
    
    Returns:
    - Dict[str, Dict[str, Any]]: A dictionary with the name of the segmentor and datetime of segmentation as keys. 
      Each key maps to another dictionary containing 'created_by', 'created_time', 'dicom_name', and 'dicom_fullpath'.
    """
    segmentation_paths = [d for d in os.listdir(assessors_path) if os.path.isdir(os.path.join(assessors_path, d))]
    segmentations = {}

    for seg in segmentation_paths:
        seg_dir = os.path.join(assessors_path, seg, 'SEG')
        if os.path.exists(seg_dir):
            files = os.listdir(seg_dir)
            
            # Find XML files
            xml_files = [f for f in files if f.endswith('.xml')]
            
            for xml_file in xml_files:
                xml_path = os.path.join(seg_dir, xml_file)
                try:
                    # Parse the XML file
                    tree = ET.parse(xml_path)
                    root = tree.getroot()
                    
                    # Extract createdBy and createdTime
                    entry = root.find('.//cat:entry', namespaces={'cat': 'http://nrg.wustl.edu/catalog'})
                    if entry is not None:
                        created_by = entry.get('createdBy')
                        created_time = entry.get('createdTime')
                        dicom_name = entry.get('name')
                        dicom_fullpath = os.path.join(seg_dir, dicom_name)
                        
                        # Save to dictionary
                        key = f"{created_by}>>{created_time}"
                        segmentations[key] = {
                            'created_by': created_by,
                            'created_time': created_time,
                            'dicom_name': dicom_name,
                            'dicom_fullpath': dicom_fullpath
                        }
                        logging.info(segmentations[key])
                except ET.ParseError as e:
                    logging.error(f"Error parsing XML file {xml_file}: {e}")
                except Exception as e:
                    logging.error(f"Unexpected error: {e}")

    return segmentations

ass_path = assessors_path(patient_directory)                    
all_segmentation_dic = get_segmentations_from_assessors_path(ass_path)
all_segmentation_dic

INFO:root:No segmentation exists


TypeError: expected str, bytes or os.PathLike object, not NoneType

## Select the desired segmentation

In [79]:
# here user should chose one of the segmetnations in the all_segmentation_dic
selected_segmentation= 'farzane_shoja>>2024-01-08T21:47:48.601'
selected_segmentation_dic = all_segmentation_dic[selected_segmentation]
selected_segmentation_dic
selected_dicom_fullpath = selected_segmentation_dic.get('dicom_fullpath')
selected_dicom_data = pydicom.dcmread(dicom_fullpath)
selected_dicom_data

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 220
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: Segmentation Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 2.25.491103861982319274517040568320756975749
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 2.25.80302813137786398554742050926734630921603366648225212145404
(0002, 0013) Implementation Version Name         SH: 'dcmjs-0.0'
-------------------------------------------------
(0008, 0008) Image Type                          CS: ['DERIVED', 'PRIMARY']
(0008, 0016) SOP Class UID                       UI: Segmentation Storage
(0008, 0018) SOP Instance UID                    UI: 2.25.491103861982319274517040568320756975749
(0008, 0020) Study Date                          DA: '20171119'
(0008, 0021) Series Date        

## Get data of segmentation dicom

- extract_referenced_sop_instance_uids
- create_segment_number_to_label_map

In [80]:
def get_nested_element(dataset, tags):
    """
    Navigate through the DICOM dataset using a list of tags and return the final element.

    Parameters:
        dataset (pydicom.dataset.Dataset): The DICOM dataset.
        tags (list): A list of tuples representing the tags to navigate through.

    Returns:
        The final element in the DICOM dataset specified by the tags.
    """
    current_element = dataset
    for tag in tags:
        tag = pydicom.tag.Tag(tag)
        if tag in current_element:
            current_element = current_element[tag]
        else:
            raise KeyError(f"Tag {tag} not found in the dataset.")
        
        # If the current element is a sequence, assume we want the first item
        if isinstance(current_element, pydicom.sequence.Sequence):
            if len(current_element) > 0:
                current_element = current_element[0]
            else:
                raise ValueError(f"Sequence at tag {tag} is empty.")
    
    return current_element

def extract_referenced_sop_instance_uids(segmentation_dicom) -> list:
    """
    Extract a list of Referenced SOP Instance UIDs from a DICOM segmentation dataset.

    Parameters:
        segmentation_dicom (pydicom.dataset.Dataset): The DICOM dataset containing segmentation data.

    Returns:
        list: A list of Referenced SOP Instance UIDs.
    """
    referenced_sop_instance_uids = []
    try:
        referenced_series_sequence = get_nested_element(segmentation_dicom, [(0x0008, 0x1115)])
        for series_instance in referenced_series_sequence:
            if 'ReferencedInstanceSequence' in series_instance:
                for ref_instance in series_instance.ReferencedInstanceSequence:
                    if (0x0008, 0x1155) in ref_instance:
                        sop_instance_uid = ref_instance[(0x0008, 0x1155)].value
                        referenced_sop_instance_uids.append(sop_instance_uid)
        logging.info(f"Number of Referenced SOP Instance UIDs: {len(referenced_sop_instance_uids)}")
    except (KeyError, ValueError) as e:
        logging.error(f"Error extracting Referenced SOP Instance UIDs: {e}")
    return referenced_sop_instance_uids

def create_segment_number_to_label_map(segmentation_dicom) -> dict:
    """
    Create a dictionary mapping Segment Number to Segment Label from a DICOM segmentation object.

    Parameters:
        segmentation_dicom (pydicom.dataset.Dataset): The DICOM dataset containing segmentation data.

    Returns:
        dict: A dictionary mapping Segment Number (int) to Segment Label (str).
    """
    segment_map = {}
    try:
        segment_sequence = get_nested_element(segmentation_dicom, [(0x0062, 0x0002)])
        for item in segment_sequence:
            if (0x0062, 0x0004) in item and (0x0062, 0x0005) in item:
                segment_number = item[(0x0062, 0x0004)].value
                segment_label = item[(0x0062, 0x0005)].value
                segment_map[segment_number] = segment_label
        logging.info(f"Segment map: {segment_map}")
    except (KeyError, ValueError) as e:
        logging.error(f"Error creating segment map: {e}")
    return segment_map

list_refSOPinstanceUID = extract_referenced_sop_instance_uids(selected_dicom_data)
segment_map = create_segment_number_to_label_map(selected_dicom_data)


INFO:root:Number of Referenced SOP Instance UIDs: 35
INFO:root:Segment map: {1: 'P', 2: 'M'}


In [None]:
referenced_series_sequence = get_nested_element(segmentation_dicom, [(0x0008, 0x1115)])
        for series_instance in referenced_series_sequence:
            if 'ReferencedInstanceSequence' in series_instance:
                for ref_instance in series_instance.ReferencedInstanceSequence:
                    if (0x0008, 0x1155) in ref_instance:
                        sop_instance_uid = ref_instance[(0x0008, 0x1155)].value
                        referenced_sop_instance_uids.append(sop_instance_uid)

In [85]:
segmentation_dicom = selected_dicom_data
per_frame_group_seq= get_nested_element(segmentation_dicom , [(0x5200,0x9230)])
for der_image_seq in per_frame_group_seq:
    print(der_image_seq)

(0008, 9124)  Derivation Image Sequence  1 item(s) ---- 
   (0008, 2112)  Source Image Sequence  1 item(s) ---- 
      (0008, 1150) Referenced SOP Class UID            UI: CT Image Storage
      (0008, 1155) Referenced SOP Instance UID         UI: 2.1298488738111743097671400985957233014971
      (0040, a170)  Purpose of Reference Code Sequence  1 item(s) ---- 
         (0008, 0100) Code Value                          SH: '121322'
         (0008, 0102) Coding Scheme Designator            SH: 'DCM'
         (0008, 0104) Code Meaning                        LO: 'Source image for image processing operation'
         ---------
      ---------
   (0008, 9215)  Derivation Code Sequence  1 item(s) ---- 
      (0008, 0100) Code Value                          SH: '113076'
      (0008, 0102) Coding Scheme Designator            SH: 'DCM'
      (0008, 0104) Code Meaning                        LO: 'Segmentation'
      ---------
   ---------
(0020, 9111)  Frame Content Sequence  1 item(s) ---- 
   (00

In [83]:
per_frame_group_seq[0]

(0008, 9124)  Derivation Image Sequence  1 item(s) ---- 
   (0008, 2112)  Source Image Sequence  1 item(s) ---- 
      (0008, 1150) Referenced SOP Class UID            UI: CT Image Storage
      (0008, 1155) Referenced SOP Instance UID         UI: 2.1298488738111743097671400985957233014971
      (0040, a170)  Purpose of Reference Code Sequence  1 item(s) ---- 
         (0008, 0100) Code Value                          SH: '121322'
         (0008, 0102) Coding Scheme Designator            SH: 'DCM'
         (0008, 0104) Code Meaning                        LO: 'Source image for image processing operation'
         ---------
      ---------
   (0008, 9215)  Derivation Code Sequence  1 item(s) ---- 
      (0008, 0100) Code Value                          SH: '113076'
      (0008, 0102) Coding Scheme Designator            SH: 'DCM'
      (0008, 0104) Code Meaning                        LO: 'Segmentation'
      ---------
   ---------
(0020, 9111)  Frame Content Sequence  1 item(s) ---- 
   (00

In [None]:
original_CT_path = 