# `Pydicom` DICOM Tags

In [3]:
from pyhere import here
from pydicom import dcmread


## Read

In [2]:
upenn1_dcm1 = dcmread(here("data/MRI-DICOM/UPENN-GBM-00001/02-06-2002-NA-BRAINROUTINE-33106/2.000000-t2Flairaxial ProcessedCaPTk-17693/1-01.dcm"))
upenn1_dcm1

Dataset.file_meta -------------------------------
(0002,0000) File Meta Information Group Length  UL: 204
(0002,0001) File Meta Information Version       OB: b'\x00\x01'
(0002,0002) Media Storage SOP Class UID         UI: MR Image Storage
(0002,0003) Media Storage SOP Instance UID      UI: 1.3.6.1.4.1.14519.5.2.1.224599622610134734824248166322625258212
(0002,0010) Transfer Syntax UID                 UI: Implicit VR Little Endian
(0002,0012) Implementation Class UID            UI: 1.3.6.1.4.1.22213.1.143
(0002,0013) Implementation Version Name         SH: '0.5'
(0002,0016) Source Application Entity Title     AE: 'POSDA'
-------------------------------------------------
(0008,0005) Specific Character Set              CS: 'ISO_IR 100'
(0008,0008) Image Type                          CS: ['DERIVED', 'SECONDARY', 'OTHER']
(0008,0012) Instance Creation Date              DA: '20020206'
(0008,0013) Instance Creation Time              TM: '120146.015000'
(0008,0016) SOP Class UID                

## Get DICOM Tags (Single `.dcm`)

### Use DICOM Attribute Names

In [4]:
upenn1_dcm1.PatientID

'UPENN-GBM-00001'

### Use DICOM Tags

(0010,0020) is a tag for Patient ID

In [16]:
# Access the value of a tag directly using its group and element number, represented as a tuple (group, element).
print(upenn1_dcm1[0x10, 0x20])

print(upenn1_dcm1.get((0x0010, 0x0020)))

(0010,0020) Patient ID                          LO: 'UPENN-GBM-00001'
(0010,0020) Patient ID                          LO: 'UPENN-GBM-00001'


**Safe Access:**

In [21]:
upenn1_dcm1.get((0x0010, 0x0020), "Unknown")

(0010,0020) Patient ID                          LO: 'UPENN-GBM-00001'

In [22]:
# (0008,1010): Station Name
upenn1_dcm1.get((0x0008, 0x1010), "Unknown")

'Unknown'

### Loop over tags

In [24]:
# Iterate through all tags to find Patient ID
for tag in upenn1_dcm1:
    if tag.tag == (0x0010, 0x0020):  # Check for (0010,0020)
        print("Patient ID:", tag.value)
        break

Patient ID: UPENN-GBM-00001


## Function: `DcmMetaExtractor()`

In [4]:
import pydicom
import pandas as pd
import warnings

class DcmMetaExtractor:
    def __init__(self, tags_to_extract=None):
        """
        Initialize the DICOMExtractor with tags to extract.
        """
        self.tags_to_extract = tags_to_extract or {
            (0x0008, 0x0070): "Manufacturer",
            (0x0008, 0x1090): "Manufacturer's Model Name",
            (0x0018, 0x0087): "Magnetic Field Strength",
            (0x0018, 0x0050): "Slice Thickness",
            (0x0018, 0x0088): "Spacing Between Slices",
            (0x0018, 0x0080): "Repetition Time",
            (0x0018, 0x0081): "Echo Time",
            (0x0018, 0x1314): "Flip Angle"
        }

    def extract_tags(self, fp):
        """
        Extract specified tags from the DICOM file and return as a pandas Series.
        
        Parameters:
            fp (str): Path to the DICOM file.
        
        Returns:
            pd.Series: A pandas Series containing the extracted tags with missing values as pd.NA.
        """
        try:
            # Read the DICOM file
            ds = pydicom.dcmread(fp)
            
            # Initialize a dictionary to store the results
            results = {}
            
            # Extract values for the specified tags
            for tag, name in self.tags_to_extract.items():
                element = ds.get(tag)  # Get the DataElement for the tag
                results[name] = element.value if element is not None else pd.NA
        
        except Exception as e:
            # Log a warning and create a dictionary with all fields set to pd.NA
            warnings.warn(f"Failed to read DICOM file '{fp}'. Returning missing values. Reason: {e}")
            results = {name: pd.NA for name in self.tags_to_extract.values()}
        
        # Convert the results dictionary to a pandas Series
        return pd.Series(results)


In [5]:
dcm_extractor = DcmMetaExtractor()

dcm_extractor.extract_tags(here("data/MRI-DICOM/UPENN-GBM-00001/02-06-2002-NA-BRAINROUTINE-33106/2.000000-t2Flairaxial ProcessedCaPTk-17693/1-01.dcm"))

Manufacturer                 SIEMENS
Manufacturer's Model Name    TrioTim
Magnetic Field Strength          3.0
Slice Thickness                  3.0
Spacing Between Slices           1.0
Repetition Time               9420.0
Echo Time                      140.0
Flip Angle                     170.0
dtype: object