<a href="https://colab.research.google.com/github/ImagingDataCommons/CloudSegmentator/blob/main/workflows/TotalSegmentator/Notebooks/postProcessingExtractPerframe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **This notebook extracts the DICOM attribute PerFrameFunctionalGroupsSequence from DICOM SEG Objects. It takes the SEG files as input, decompresses them, extract the DICOM attribute and flattens the attribute**

### **Installing Packages**

In [None]:
%%capture
import sys
if 'google.colab' in sys.modules:
    !sudo apt-get update \
    && apt-get install -y --no-install-recommends \
    lz4

In [None]:
%%capture
if 'google.colab' in sys.modules:
   !pip install pydicom \
      google-cloud-bigquery \
      pyarrow \
      db_dtypes

### **Importing Packages**

In [None]:
import logging
import os
import pandas as pd
import pydicom
import shutil
import subprocess
import sys
from tqdm import tqdm
import traceback


### **Parameters for papermill**

In [None]:
if 'google.colab' in sys.modules:
    !wget -q https://github.com/ImagingDataCommons/CloudSegmentator/releases/download/v1.0.0/dicomsegAndRadiomicsSR_DICOMsegFiles.tar.lz4
    segFiles=["dicomsegAndRadiomicsSR_DICOMsegFiles.tar.lz4"]


### **This is the cell used on cloud, as the file paths are passed to the notebook as a string**

In [None]:
if not 'google.colab' in sys.modules:
    segFiles=segFiles.split(',')

### **Logging**

In [None]:
# Initialize logging
logging.basicConfig(filename="console_output.txt", level=logging.INFO)

# Create an output directory to store CSV
output_dir = 'output'
os.makedirs(output_dir, exist_ok=True)

### **Extract PerFrameFunctionalGroupsSequence**

In [None]:
combined_data = []
try:
    for segFileBatch in tqdm(segFiles):
        logging.info("Processing URL: %s", segFileBatch)

        try:
            shutil.rmtree("itkimage2segimage")
            shutil.rmtree("decompressedSegmentationsDICOM")
        except OSError:
            pass

        os.mkdir("decompressedSegmentationsDICOM")
        try:
            !lz4 -d --rm $segFileBatch -c | tar --strip-components=0 -xvf -
            !find ./itkimage2segimage -name '*.dcm.lz4' -exec mv -t decompressedSegmentationsDICOM {} +
            !lz4 -d -m --rm "decompressedSegmentationsDICOM"/*.lz4
            print("files successfully decompressed")
        except Exception as download_error:
            logging.error(
                "Error during download and decompression: %s", str(download_error)
            )

        # Find all series IDs and add them to the combined_data list
        series_ids = [
            filename.split("_")[0]
            for filename in os.listdir("decompressedSegmentationsDICOM")
        ]
        print(series_ids)
        for series_id in series_ids:
            pffgs = pydicom.dcmread(
                f"decompressedSegmentationsDICOM/{series_id}",
                specific_tags=[
                    "SeriesInstanceUID",
                    "ReferencedSeriesSequence",
                    "PerFrameFunctionalGroupsSequence",
                ],
                stop_before_pixels=True,
            )
            referencedSeriesInstanceUID = pffgs.ReferencedSeriesSequence[
                0
            ].SeriesInstanceUID
            SeriesInstanceUID=pffgs.SeriesInstanceUID
            data = []

            # Extract data from Per-frame Functional Groups Sequence
            if "PerFrameFunctionalGroupsSequence" in pffgs:
                for item in pffgs.PerFrameFunctionalGroupsSequence:
                    frame_data = {
                        "SEG_SeriesInstanceUID": SeriesInstanceUID,
                        "ReferencedSeriesSequence_SeriesInstanceUID": referencedSeriesInstanceUID,
                        "FrameContentSequence_DimensionIndexValues": [
                            str(s)
                            for s in list(
                                item.FrameContentSequence[0].DimensionIndexValues
                            )
                        ],
                        "PlanePositionSequence_ImagePositionPatient": [
                            str(s)
                            for s in list(
                                item.PlanePositionSequence[0].ImagePositionPatient
                            )
                        ],
                        "SegmentIdentificationSequence_ReferencedSegmentNumber": item.SegmentIdentificationSequence[
                            0
                        ].ReferencedSegmentNumber,
                    }

                    # Extract attributes from Derivation Image Sequence
                    derivation_image_sequence = item.DerivationImageSequence
                    if derivation_image_sequence:
                        source_image_sequence = derivation_image_sequence[
                            0
                        ].SourceImageSequence
                        if source_image_sequence:
                            frame_data[
                                "DerivationImageSequence_SourceImageSequence_ReferencedSOPClassUID"
                            ] = source_image_sequence[0].ReferencedSOPClassUID
                            frame_data[
                                "DerivationImageSequence_SourceImageSequence_ReferencedSOPInstanceUID"
                            ] = source_image_sequence[0].ReferencedSOPInstanceUID
                            purpose_of_reference_code_sequence = source_image_sequence[
                                0
                            ].PurposeOfReferenceCodeSequence
                            if purpose_of_reference_code_sequence:
                                frame_data[
                                    "DerivationImageSequence_SourceImageSequence_PurposeOfReferenceCodeSequence_CodeValue"
                                ] = purpose_of_reference_code_sequence[0].CodeValue
                                frame_data[
                                    "DerivationImageSequence_SourceImageSequence_PurposeOfReferenceCodeSequence_CodingSchemeDesignator"
                                ] = purpose_of_reference_code_sequence[
                                    0
                                ].CodingSchemeDesignator
                                frame_data[
                                    "DerivationImageSequence_SourceImageSequence_PurposeOfReferenceCodeSequence_CodeMeaning"
                                ] = purpose_of_reference_code_sequence[0].CodeMeaning

                    # Extract attributes from Derivation Code Sequence
                    derivation_code_sequence = derivation_image_sequence[
                        0
                    ].DerivationCodeSequence
                    if derivation_code_sequence:
                        frame_data[
                            "DerivationImageSequence_DerivationCodeSequence_CodeValue"
                        ] = derivation_code_sequence[0].CodeValue
                        frame_data[
                            "DerivationImageSequence_DerivationCodeSequence_CodingSchemeDesignator"
                        ] = derivation_code_sequence[0].CodingSchemeDesignator
                        frame_data[
                            "DerivationImageSequence_DerivationCodeSequence_CodeMeaning"
                        ] = derivation_code_sequence[0].CodeMeaning

                    data.append(frame_data)

            # Add data from this series to the combined_data list
            combined_data.extend(data)

except Exception as e:
    logging.error("An error occurred: %s", str(e))

finally:
    # Create a DataFrame from the combined_data list
    df = pd.DataFrame(combined_data)

    # Generate a single CSV file for all the data
    csv_filename = "perFrameFunctionalGroupSequence.csv"
    df.to_csv(csv_filename, index=False)
    !lz4 --rm 'perFrameFunctionalGroupSequence.csv' 'perFrameFunctionalGroupSequence.csv.lz4'

    logging.info("Processing complete.")