In [1]:
import sys
import csv
import os
# setting path
sys.path.append('../')

from dcm_validator.dciodvfy import DCIodValidator

In [2]:
sample_dcm_path = "/home/r079a/Desktop/de-identification/dataset/midi-test-data/input_data/2749483915/3.4.816.1.3.6415641.4.839.3233620932864866795/3.4.816.1.3.6415641.4.839.1353086665728801204/00000001.dcm"

In [3]:
validator = DCIodValidator()

In [4]:
errors, warnings = validator.validate_dicom(sample_dcm_path)

In [5]:
missing_attribute_errors = DCIodValidator.filter_missing_attributes_errors(errors)

In [6]:
for err in missing_attribute_errors:
    print(err)

Error:
Tag: (0020, 0060)
Name: Laterality
Index: 0
Message: Missing attribute for Type 2C Conditional
Type: 2C
Module: GeneralSeries


In [7]:
sequence_number_attribute_errors = DCIodValidator.filter_missing_attributes_errors(errors, filter_mssg='Bad Sequence number')
for err in sequence_number_attribute_errors:
    print(err)

In [8]:
import pydicom
from pydicom.datadict import dictionary_VR, keyword_for_tag

In [9]:
for err in missing_attribute_errors:
    print(err.name)
    print(dictionary_VR(err.tag), err.missing_type)

Laterality
CS 2C


In [10]:
ds = pydicom.dcmread(sample_dcm_path)

In [11]:
ds

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 178
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: CT Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 3.4.816.1.3.6415641.4.839.1566195357688193396
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
(0002, 0013) Implementation Version Name         SH: 'dcm4che-1.4.34'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'LOCALIZER', 'CT_SOM5 TOP']
(0008, 0016) SOP Class UID                       UI: CT Image Storage
(0008, 0018) SOP Instance UID                    UI: 3.4.816.1.3.6415641.4.839.1566195357688193396
(0008, 0020) Study Date                          

In [12]:
# def walk_parents_to_find_tag(ds, target_tag, parents_list: list = []):
#    for idx, parent_tag in enumerate(parents):
#        element = selected_ds.get(parent_tag)
#        if element.VR == "SQ":
#            for sub_dataset in element.value:
#                1

def get_empty_element_value_for_tag(tag):
    elem_vr = dictionary_VR(tag)
    elem_name = keyword_for_tag(tag)
    elem_val = None
    if elem_vr in ("SH", "PN", "UI", "LO", "LT", "CS", "ST", "UT"):          
        elem_val = ""
    elif elem_vr in ("DT", "DA", "TM"):
        elem_val = ""
    elif elem_vr in ("UL", "FL", "FD", "SL", "SS", "US"):
        elem_val = 0
    elif elem_vr in ("DS", "IS"):
        elem_val = "0"
    elif elem_vr == "UN":
        elem_val = b""
    else:
        pass
    return elem_val

def create_element_from_tag(tag):
    ignore_list = ['CodeValue', 'CodeMeaning']
    
    elem_vr = dictionary_VR(tag)
    elem_name = keyword_for_tag(tag)
    elem_val = get_empty_element_value_for_tag(tag)

    if elem_name in ignore_list:
        return None
    elif elem_val is None:
        return None
    else:
        elem = pydicom.dataelem.DataElement(tag, elem_vr, elem_val)
        return elem
    

def create_empty_element(ds, element_tag, parents: list = []):
    selected_ds = None
    if len(parents) > 1:
        sub_dataset = ds
        for ptag in parents:
            sq_elem = sub_dataset.get(ptag)
            sub_dataset = sq_elem.value[0]
        selected_ds = sub_dataset
    elif len(parents) == 1:
        element = ds.get(parents[0])
        if element is not None:
            if len(element.value) > 0:
                selected_ds = element.value[0]
            else:
                selected_ds = pydicom.dataset.Dataset()
                element.value.append(selected_ds) 
    else:
        selected_ds = ds

    if selected_ds is not None:
        new_element = create_element_from_tag(element_tag)
        if new_element is not None:
            selected_ds.add(new_element)
            print(f"Element created for tag {element_tag} {keyword_for_tag(element_tag)}")     
        else:
            print(f"Element can not be created for tag {element_tag} {keyword_for_tag(element_tag)}")          

In [13]:
for error in missing_attribute_errors:
    create_empty_element(ds, error.tag, error.parents)

Element created for tag (0020, 0060) Laterality


In [14]:
ds

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 178
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: CT Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 3.4.816.1.3.6415641.4.839.1566195357688193396
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
(0002, 0013) Implementation Version Name         SH: 'dcm4che-1.4.34'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'LOCALIZER', 'CT_SOM5 TOP']
(0008, 0016) SOP Class UID                       UI: CT Image Storage
(0008, 0018) SOP Instance UID                    UI: 3.4.816.1.3.6415641.4.839.1566195357688193396
(0008, 0020) Study Date                          

In [15]:
# output_path = "../00000098.dcm"

# ds.save_as(output_path)

In [16]:
# newerrs, _ = validator.validate_dicom(output_path)
# missing_attribute_errors = DCIodValidator.filter_missing_attributes_errors(newerrs)

In [17]:
# for err in missing_attribute_errors:
#     print(err.name)
#     print(dictionary_VR(err.tag), err.missing_type)

In [18]:
# print(len(errors))
# print(len(newerrs))

In [19]:
# set1 = set((x.tag,x.name) for x in newerrs)
# difference = [x for x in errors if (x.tag,x.name) not in set1]
# for d in difference:
#     print(d)

In [20]:
from pydicom.uid import generate_uid
import shutil

In [21]:
validation_sample = "/home/r079a/Desktop/de-identification/dataset/midi-validation-data/input_data/8371727310/3.1.755.1.3.3756213.9.632.3767770174295087735/3.1.755.1.3.3756213.9.632.2879562198953027365/00000039.dcm"

In [22]:
output_path = "../00000039.dcm"

shutil.copyfile(validation_sample, output_path)

'../00000039.dcm'

In [23]:
ds = pydicom.dcmread(output_path)

ds

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 178
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: Secondary Capture Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 3.1.755.1.3.3756213.9.632.5167360340924479751
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
(0002, 0013) Implementation Version Name         SH: 'dcm4che-1.4.35'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['DERIVED', 'SECONDARY', 'MPR']
(0008, 0012) Instance Creation Date              DA: '20151225'
(0008, 0013) Instance Creation Time              TM: '081635'
(0008, 0016) SOP Class UID                       UI: Secondary Capture Image Storage
(0008, 0018)

In [24]:
validator = DCIodValidator()

validator.populate_missing_attributes(output_path)

validator.added_attr_log

{'PositionReferenceIndicator': 1, 'ReferencedSOPClassUID': 1}

In [25]:
ds = pydicom.dcmread(output_path)

ds

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 178
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: Secondary Capture Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 3.1.755.1.3.3756213.9.632.5167360340924479751
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
(0002, 0013) Implementation Version Name         SH: 'dcm4che-1.4.35'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['DERIVED', 'SECONDARY', 'MPR']
(0008, 0012) Instance Creation Date              DA: '20151225'
(0008, 0013) Instance Creation Time              TM: '081635'
(0008, 0016) SOP Class UID                       UI: Secondary Capture Image Storage
(0008, 0018)

In [26]:
# errors, _ = validator.validate_dicom(validation_sample)
# missing_attribute_errors = DCIodValidator.filter_missing_attributes_errors(errors)
# DCIodValidator.print_valitation_item_list(missing_attribute_errors)
# ds = pydicom.dcmread(validation_sample)

# attribute_created = 0
# added_tags = []
# for error in missing_attribute_errors:
#     created = validator.create_empty_element(ds, error.tag, error.parents)
#     if created:
#         attribute_created += 1
#         added_tags.append(error.tag)

# missing_sequence_number_attr_errs = DCIodValidator.filter_missing_attributes_errors(errors, filter_mssg='Bad Sequence number')
# for error in missing_sequence_number_attr_errs:
#     if error.tag == (0x0008, 0x1120):
#         created = validator.create_empty_element(ds, pydicom.tag.Tag(0x0008, 0x1150), [error.tag], forced=True)
#         if created:
#             attribute_created += 1
#             added_tags.append(error.tag)

In [27]:
# added_tags

In [28]:
ds

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 178
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: Secondary Capture Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 3.1.755.1.3.3756213.9.632.5167360340924479751
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
(0002, 0013) Implementation Version Name         SH: 'dcm4che-1.4.35'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['DERIVED', 'SECONDARY', 'MPR']
(0008, 0012) Instance Creation Date              DA: '20151225'
(0008, 0013) Instance Creation Time              TM: '081635'
(0008, 0016) SOP Class UID                       UI: Secondary Capture Image Storage
(0008, 0018)

In [29]:
output_path = "../00001031.dcm"

ds.save_as(output_path)

In [30]:
newerrs, _ = validator.validate_dicom(output_path)

In [31]:
DCIodValidator.print_valitation_item_list(newerrs)

</MediaStorageSOPInstanceUID(0002,0003)> - Illegal root for UID = <3.1.755.1.3.3756213.9.632.5167360340924479751>
</SOPInstanceUID(0008,0018)> - Illegal root for UID = <3.1.755.1.3.3756213.9.632.5167360340924479751>
</ReferencedPerformedProcedureStepSequence(0008,1111)[1]/ReferencedSOPInstanceUID(0008,1155)> - Illegal root for UID = <3.1.755.1.3.3756213.9.632.2936742109197398459>
</ReferencedImageSequence(0008,1140)[1]/ReferencedSOPInstanceUID(0008,1155)> - Illegal root for UID = <3.1.755.1.3.3756213.9.632.1597287086640527900>
</StudyInstanceUID(0020,000d)> - Illegal root for UID = <3.1.755.1.3.3756213.9.632.3767770174295087735>
</SeriesInstanceUID(0020,000e)> - Illegal root for UID = <3.1.755.1.3.3756213.9.632.2879562198953027365>
</FrameOfReferenceUID(0020,0052)> - Illegal root for UID = <3.1.755.1.3.3756213.9.632.2651306048445902475>
</ReferencedPatientSequence(0008,1120)[1]/ReferencedSOPInstanceUID(0008,1155)> - Missing attribute for Type 1 Required - Module=<SOPInstanceReferenceMa