In [1]:
import sys
import csv
import os
# setting path
sys.path.append('../')

from dcm_validator.dciodvfy import DCIodValidator

In [2]:
sample_dcm_path = "/home/r079a/Desktop/de-identification/dataset/midi-validation-data/input_data/3363075481/3.2.144.1.2.0137535.6.683.1214942220616161257/3.2.144.1.2.0137535.6.683.2337113981393126883/00000098.dcm"

In [3]:
validator = DCIodValidator()

In [4]:
errors, warnings = validator.validate_dicom(sample_dcm_path)

In [5]:
missing_attribute_errors = DCIodValidator.filter_missing_attributes_errors(errors)

In [6]:
for err in missing_attribute_errors:
    print(err)

Error:
Tag: (0008, 0100)
Name: CodeValue
Index: 1
Parents: (0008, 1032)
Message: Missing attribute for Type 1C Conditional
Type: 1C
Module: BasicCodeSequenceMacro
Error:
Tag: (0008, 0104)
Name: CodeMeaning
Index: 1
Parents: (0008, 1032)
Message: Missing attribute for Type 1 Required
Type: 1
Module: BasicCodeSequenceMacro
Error:
Tag: (0008, 0119)
Name: LongCodeValue
Index: 1
Parents: (0008, 1032)
Message: Missing attribute for Type 1C Conditional
Type: 1C
Module: BasicCodeSequenceMacro
Error:
Tag: (0008, 0120)
Name: URNCodeValue
Index: 1
Parents: (0008, 1032)
Message: Missing attribute for Type 1C Conditional
Type: 1C
Module: BasicCodeSequenceMacro
Error:
Tag: (0020, 0060)
Name: Laterality
Index: 0
Message: Missing attribute for Type 2C Conditional
Type: 2C
Module: GeneralSeries
Error:
Tag: (0020, 1040)
Name: PositionReferenceIndicator
Index: 0
Message: Missing attribute for Type 2 Required
Type: 2
Module: FrameOfReference


In [7]:
import pydicom
from pydicom.datadict import dictionary_VR, keyword_for_tag

In [8]:
for err in missing_attribute_errors:
    print(err.name)
    print(dictionary_VR(err.tag), err.missing_type)

CodeValue
SH 1C
CodeMeaning
LO 1
LongCodeValue
UC 1C
URNCodeValue
UR 1C
Laterality
CS 2C
PositionReferenceIndicator
LO 2


In [9]:
ds = pydicom.dcmread(sample_dcm_path)

In [10]:
ds



Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 178
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: MR Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 3.2.144.1.2.0137535.6.683.1870795683468907250
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
(0002, 0013) Implementation Version Name         SH: 'dcm4che-1.4.31'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['DERIVED', 'PRIMARY']
(0008, 0016) SOP Class UID                       UI: MR Image Storage
(0008, 0018) SOP Instance UID                    UI: 3.2.144.1.2.0137535.6.683.1870795683468907250
(0008, 0020) Study Date                          DA: '20170113'
(0008, 0021) S

In [11]:
# def walk_parents_to_find_tag(ds, target_tag, parents_list: list = []):
#    for idx, parent_tag in enumerate(parents):
#        element = selected_ds.get(parent_tag)
#        if element.VR == "SQ":
#            for sub_dataset in element.value:
#                1

def get_empty_element_value_for_tag(tag):
    elem_vr = dictionary_VR(tag)
    elem_name = keyword_for_tag(tag)
    elem_val = None
    if elem_vr in ("SH", "PN", "UI", "LO", "LT", "CS", "ST", "UT"):          
        elem_val = ""
    elif elem_vr in ("DT", "DA", "TM"):
        elem_val = ""
    elif elem_vr in ("UL", "FL", "FD", "SL", "SS", "US"):
        elem_val = 0
    elif elem_vr in ("DS", "IS"):
        elem_val = "0"
    elif elem_vr == "UN":
        elem_val = b""
    else:
        pass
    return elem_val

def create_element_from_tag(tag):
    ignore_list = ['CodeValue', 'CodeMeaning']
    
    elem_vr = dictionary_VR(tag)
    elem_name = keyword_for_tag(tag)
    elem_val = get_empty_element_value_for_tag(tag)

    if elem_name in ignore_list:
        return None
    elif elem_val is None:
        return None
    else:
        elem = pydicom.dataelem.DataElement(tag, elem_vr, elem_val)
        return elem
    

def create_empty_element(ds, element_tag, parents: list = []):
    selected_ds = None
    if len(parents) > 1:
        raise NotImplementedError
    elif len(parents) == 1:
        element = ds.get(parents[0])
        if element is not None:
            if len(element.value) > 0:
                selected_ds = element.value[0]
            else:
                selected_ds = pydicom.dataset.Dataset()
                element.value.append(selected_ds) 
    else:
        selected_ds = ds

    if selected_ds is not None:
        new_element = create_element_from_tag(element_tag)
        if new_element is not None:
            selected_ds.add(new_element)
            print(f"Element created for tag {element_tag} {keyword_for_tag(element_tag)}")     
        else:
            print(f"Element can not be created for tag {element_tag} {keyword_for_tag(element_tag)}")          

In [12]:
for error in missing_attribute_errors:
    create_empty_element(ds, error.tag, error.parents)

Element can not be created for tag (0008, 0100) CodeValue
Element can not be created for tag (0008, 0104) CodeMeaning
Element can not be created for tag (0008, 0119) LongCodeValue
Element can not be created for tag (0008, 0120) URNCodeValue
Element created for tag (0020, 0060) Laterality
Element created for tag (0020, 1040) PositionReferenceIndicator


In [13]:
ds

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 178
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: MR Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 3.2.144.1.2.0137535.6.683.1870795683468907250
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
(0002, 0013) Implementation Version Name         SH: 'dcm4che-1.4.31'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['DERIVED', 'PRIMARY']
(0008, 0016) SOP Class UID                       UI: MR Image Storage
(0008, 0018) SOP Instance UID                    UI: 3.2.144.1.2.0137535.6.683.1870795683468907250
(0008, 0020) Study Date                          DA: '20170113'
(0008, 0021) S

In [14]:
output_path = "../00000098.dcm"

ds.save_as(output_path)

In [15]:
newerrs, _ = validator.validate_dicom(output_path)
missing_attribute_errors = DCIodValidator.filter_missing_attributes_errors(newerrs)

In [16]:
for err in missing_attribute_errors:
    print(err.name)
    print(dictionary_VR(err.tag), err.missing_type)

CodeValue
SH 1C
CodeMeaning
LO 1
LongCodeValue
UC 1C
URNCodeValue
UR 1C


In [17]:
print(len(errors))
print(len(newerrs))

21
19


In [18]:
set1 = set((x.tag,x.name) for x in newerrs)
difference = [x for x in errors if (x.tag,x.name) not in set1]
for d in difference:
    print(d)

</Laterality(0020,0060)> - Missing attribute for Type 2C Conditional - Module=<GeneralSeries>
</PositionReferenceIndicator(0020,1040)> - Missing attribute for Type 2 Required - Module=<FrameOfReference>


In [30]:
validation_sample = "/home/r079a/Desktop/de-identification/dataset/midi-validation-data/input_data/154824530/2.3.397.0.1.8038924.5.958.1510767526752412776/2.3.397.0.1.8038924.5.958.2106148537058589703/00000001.dcm"

In [31]:
errors, _ = validator.validate_dicom(validation_sample)
missing_attribute_errors = DCIodValidator.filter_missing_attributes_errors(errors)
DCIodValidator.print_valitation_item_list(missing_attribute_errors)
ds = pydicom.dcmread(validation_sample)

attribute_created = 0
for error in missing_attribute_errors:
    created = DCIodValidator.create_empty_element(ds, error.tag, error.parents)
    if created:
        attribute_created += 1

Error:
Tag: (0040, a170)
Name: PurposeOfReferenceCodeSequence
Index: 1
Parents: (0018, a001)
Message: Missing attribute for Type 1 Required
Type: 1
Module: SOPCommon
Error:
Tag: (0008, 0070)
Name: Manufacturer
Index: 1
Parents: (0018, a001)
Message: Missing attribute for Type 1 Required
Type: 1
Module: SOPCommon
Element can not be created for tag (0040, a170) PurposeOfReferenceCodeSequence
Element created for tag (0008, 0070) Manufacturer


In [32]:
output_path = "../00001031.dcm"

ds.save_as(output_path)

In [33]:
newerrs, _ = validator.validate_dicom(output_path)

In [34]:
DCIodValidator.print_valitation_item_list(newerrs)

</ClinicalTrialSponsorName(0012,0010)> - Empty attribute (no value) for Type 1 Required - Module=<ClinicalTrialSubject>
</ClinicalTrialProtocolID(0012,0020)> - Empty attribute (no value) for Type 1 Required - Module=<ClinicalTrialSubject>
</ClinicalTrialSubjectID(0012,0040)> - Attribute present but empty (no value) even though condition not satisfied for Type 1C Conditional - Module=<ClinicalTrialSubject>
</ClinicalTrialSubjectReadingID(0012,0042)> - Attribute present but empty (no value) even though condition not satisfied for Type 1C Conditional - Module=<ClinicalTrialSubject>
</ReferencedImageSequence(0008,1140)> - Bad Sequence number of Items = <0> (1-n Required by Module definition) - Module=<GeneralReference>
</ReferencedImageSequence(0008,1140)> - Bad attribute Value Multiplicity for Type 3 Optional - Module=<GeneralReference>
</SourceImageSequence(0008,2112)> - Bad Sequence number of Items = <0> (1-n Required by Module definition) - Module=<GeneralReference>
</SourceImageSequen