# Examine DICOM Headers for Problems

### Imports

In [1]:
from typing import List, Callable
from pathlib import Path

import pydicom
import pandas as pd
import xlwings as xw

#### Read the DICOM file.

In [2]:
def load_header(file_path: Path, 
                status_output: Callable = None)->pydicom.Dataset:
    '''Load DICOM meta data from a given file.    

    Args:
        file_path (Path): Path to the DICOM file.

    Returns:
        pydicom.Dataset: The full dataset read from the DICOM file. If the file 
            does not contain valid DICOM data, returns None.
    '''
    if status_output is None:
        status_output = print
    try:
        dataset = pydicom.dcmread(file_path)
    except pydicom.errors.InvalidDicomError:
        message = f'{file_path.name} did not contain valid DICOM data.  Skipped.'
        status_output(message)
        dataset = None

    return dataset


#### Scan a directory of DICOM files, yielding the DICOM datasets.

In [3]:
def get_dicom_images(data_path: Path,
                     status_output: Callable = None,
                     include_subdirectories=True)->pydicom.Dataset:
    '''Yield DICOM meta data for each file in a given directory.    

    Args:
        file_path (Path): Path to the directory containing DICOM files.
        status_output (Callable): A function taking one string parameter.  Used 
            for reporting the results of the trying to load the DICOM file.
        include_subdirectories (bool) If True subdirectories of the supplied 
            directory will also be scanned.

    Yields:
        pydicom.Dataset: The full dataset read from the DICOM file. If the file 
            does not contain valid DICOM data, returns None.
    '''
    if status_output is None:
        status_output = print
    if include_subdirectories:
        scan_pattern = '**/*'
    else:
        scan_pattern = '*.*'
    for dicom_file in data_path.glob(scan_pattern):
        if dicom_file.is_file():
            message = f'Checking file {dicom_file.name}'
            status_output(message)
            dataset = load_header(dicom_file, status_output)
            if dataset:
                yield dataset


In [4]:
def scan_headers(input_path: Path, desired_tags: List[str], 
                 status_output: Callable = None)->pd.DataFrame:
    '''Identify and apply necessary DICOM metadata repairs.

    Iterate through each DICOM file in the `input_path` folder, obtaining the 
    requested DICOM tags from each header.

    Args:
        input_path (Path): Path to the directory containing DICOM files.
        desired_tags (List[str]): List of desired DICOM tags to capture.
        status_output (Callable): A function taking one string parameter. 
           Used for reporting progress.
    '''
    if status_output is None:
        status_output = print    
    image_id_tags = [
        'StudyID',
        'SeriesNumber',
        'StudyDescription',
        'SeriesDescription',
        'Modality', 
        'InstanceNumber',
        'SliceLocation'
        ]
    all_tags = image_id_tags + desired_tags
    all_header_data = []
    for dataset in get_dicom_images(input_path, status_output):
        header_data = {}
        for tag in all_tags:
            value = dataset.get(tag)
            header_data[tag] = str(value)
        all_header_data.append(header_data)
    header_table = pd.DataFrame(all_header_data)
    return header_table



## Scan all Images in a directory

### Data Input and Output paths

In [5]:
#data_path = Path.cwd() / 'DICOM Test Data'
data_path = Path(r'\\dkphysicspv1\Radiation_Therapy\DICOM_IMPORT\Import Repair\repaired')
#data_path = Path(r'\\dkphysicspv1\Radiation_Therapy\DICOM_IMPORT\Import Repair\to fix')
output_path = Path.cwd() / 'Output'

In [6]:
def dummy(text: str):
    '''Function that does nothing'''
    pass

In [7]:
desired_tags = [
    'InstitutionAddress',
    'InstitutionName',
    'Manufacturer',
    'ManufacturerModelName',
    'StationName',
    'DeviceSerialNumber',
    'SoftwareVersions'
    ]

In [8]:
header_table = scan_headers(data_path, desired_tags, status_output=dummy)

In [9]:
xw.view(header_table)