diff --git a/news/1524-bugfix.md b/news/1524-bugfix.md new file mode 100644 index 000000000..c430c4b27 --- /dev/null +++ b/news/1524-bugfix.md @@ -0,0 +1 @@ +StructuredReport improvements - collect names from anywhere in text body not just in header diff --git a/src/applications/SRAnonTool/CTP_DicomToText.py b/src/applications/SRAnonTool/CTP_DicomToText.py index 2ee686394..a573a3c1f 100755 --- a/src/applications/SRAnonTool/CTP_DicomToText.py +++ b/src/applications/SRAnonTool/CTP_DicomToText.py @@ -29,6 +29,7 @@ import re from deepmerge import Merger # for deep merging yaml dictionaries from SmiServices import Mongo +from SmiServices import Dicom from SmiServices import DicomText from SmiServices import StructuredReport as SR from SmiServices import IdentifierMapper @@ -81,10 +82,10 @@ def extract_mongojson(mongojson, output, metadata_output=None, DicomTextArgs = N DicomTextArgs = {} if os.path.isdir(output): - filename = mongojson['SOPInstanceUID'] + '.txt' + filename = Dicom.tag_val(mongojson,'SOPInstanceUID', atomic=True) + '.txt' output = os.path.join(output, filename) if metadata_output and os.path.isdir(metadata_output): - mfilename = mongojson['SOPInstanceUID'] + '.json' + mfilename = Dicom.tag_val(mongojson,'SOPInstanceUID', atomic=True) + '.json' metadata_output = os.path.join(metadata_output, mfilename) logging.info('Parse %s' % mongojson.get('header',{}).get('DicomFilePath','')) if 'PatientID' in mongojson: diff --git a/src/applications/SRAnonTool/CTP_SRAnonTool.sh b/src/applications/SRAnonTool/CTP_SRAnonTool.sh index dd0c0cf5b..e14925623 100755 --- a/src/applications/SRAnonTool/CTP_SRAnonTool.sh +++ b/src/applications/SRAnonTool/CTP_SRAnonTool.sh @@ -10,8 +10,8 @@ prog=$(basename "$0") progdir=$(dirname "$0") -usage="usage: ${prog} [-d] [-v] [-e virtualenv] [-s semehr_root] -i read_from.dcm -o write_into.dcm" -options="dve:s:i:o:" +usage="usage: ${prog} [-d] [-v] [-e virtualenv] [-s semehr_root] [-y yaml] -i read_from.dcm -o write_into.dcm" +options="dve:s:y:i:o:" semehr_dir="/opt/semehr" virtenv="" debug=0 @@ -58,7 +58,9 @@ tidy_exit() # Default executable PATHs and Python libraries export PATH=${PATH}:${SMI_ROOT}/bin:${SMI_ROOT}/scripts:${progdir} -export PYTHONPATH=${SMI_ROOT}/lib/python3:${SMI_ROOT}/lib/python3/virtualenvs/semehr/$(hostname -s)/lib/python3.6/site-packages:${SMI_ROOT}/lib/python3/virtualenvs/semehr/$(hostname -s)/lib64/python3.6/site-packages +if [ "$PYTHONPATH" == "" ]; then + export PYTHONPATH=${SMI_ROOT}/lib/python3:${SMI_ROOT}/lib/python3/virtualenvs/semehr/$(hostname -s)/lib/python3.6/site-packages:${SMI_ROOT}/lib/python3/virtualenvs/semehr/$(hostname -s)/lib64/python3.6/site-packages +fi # Command line arguments while getopts ${options} var; do @@ -66,6 +68,7 @@ case $var in d) debug=1;; v) verbose=1;; e) virtenv="$OPTARG";; + y) default_yaml0="$OPTARG";; i) input_dcm="$OPTARG";; o) output_dcm="$OPTARG";; s) semehr_dir="$OPTARG";; @@ -78,7 +81,9 @@ if [ ! -f "$input_dcm" ]; then tidy_exit 2 "ERROR: cannot read input file '${input_dcm}'" fi if [ ! -f "$output_dcm" ]; then - tidy_exit 3 "ERROR: cannot write to ${output_dcm} because it must already exist" + #tidy_exit 3 "ERROR: cannot write to ${output_dcm} because it must already exist" + cp "$input_dcm" "$output_dcm" + chmod +w "$output_dcm" fi # Activate the virtual environment @@ -91,15 +96,20 @@ if [ "$virtenv" != "" ]; then fi fi -# Find the config files -if [ -d $SMI_ROOT/configs ]; then - default_yaml0="$SMI_ROOT/configs/smi_dataLoad_mysql.yaml" - default_yaml1="$SMI_ROOT/configs/smi_dataExtract.yaml" -else - default_yaml0="${progdir}/../../../data/microserviceConfigs/default.yaml" +# Find the config files, if not specified try SMI defaults otherwise in the repo +if [ "$default_yaml0" == "" ]; then + if [ -f "$SMI_ROOT/configs/smi_dataExtract.yaml" ]; then + default_yaml0="$SMI_ROOT/configs/smi_dataLoad_mysql.yaml" + default_yaml1="$SMI_ROOT/configs/smi_dataExtract.yaml" + else + default_yaml0="${progdir}/../../../data/microserviceConfigs/default.yaml" + fi +fi +if [ "$default_yaml1" == "" ]; then default_yaml1="$default_yaml0" fi + # --------------------------------------------------------------------- # Determine the SemEHR filenames - create per-process directories semehr_input_dir=$(mktemp -d -t input_docs.XXXX --tmpdir=${semehr_dir}/data) @@ -132,7 +142,7 @@ CTP_DicomToText.py -y $default_yaml0 -y $default_yaml1 \ # Reads $input_doc # Writes $anon_doc, and $anon_xml via the --xml option # -semehr_anon.py -i "${input_doc}" -o "${anon_doc}" --xml || tidy_exit 5 "Error running SemEHR-anon given ${input_doc} from ${input_dcm}" +semehr_anon.py -s "${semehr_dir}" -i "${input_doc}" -o "${anon_doc}" --xml || tidy_exit 5 "Error running SemEHR-anon given ${input_doc} from ${input_dcm}" # If there's still no XML file then exit if [ ! -f "$anon_xml" ]; then tidy_exit 6 "ERROR: SemEHR-anon failed to convert $input_doc to $anon_xml" diff --git a/src/applications/SRAnonTool/CTP_XMLToDicom.py b/src/applications/SRAnonTool/CTP_XMLToDicom.py index 8f733a334..c2c9b30eb 100755 --- a/src/applications/SRAnonTool/CTP_XMLToDicom.py +++ b/src/applications/SRAnonTool/CTP_XMLToDicom.py @@ -35,6 +35,8 @@ parser.add_argument('-i', dest='input_dcm', action="store", help='Path to raw DICOM file') parser.add_argument('-x', dest='input_xml', action="store", help='Path to annotation XML file') parser.add_argument('-o', dest='output_dcm', action="store", help='Path to anonymised DICOM file to have redacted text inserted') + parser.add_argument('--replace-html', action="store", help='replace HTML with a character, default is dot (.), or "squash" to eliminate') + parser.add_argument('--replace-newlines', action="store", help='replace carriage returns and newlines with a character (e.g. a space) or "squash" to eliminate') args = parser.parse_args() if not args.input_dcm or not args.input_xml or not args.output_dcm: parser.print_help() @@ -71,9 +73,27 @@ logging.error('ERROR: no such file named {} (redacted text is written into this so it must exist)'.format(args.output_dcm)) exit(1) + # --------------------------------------------------------------------- + # If the file is a DICOM then DicomText has options to change the output format. + # These are passed to the DicomText and StructuredReport constructors. + DicomTextArgs = { + #'include_header' : True, + #'replace_HTML_entities' : True, + 'replace_HTML_char' : '.', + 'replace_newline_char' : '\n' + } + if args.replace_html: + DicomTextArgs['replace_HTML_char'] = args.replace_html + if args.replace_html == "squash": + DicomTextArgs['replace_HTML_char'] = '' + if args.replace_newlines: + DicomTextArgs['replace_newline_char'] = args.replace_newlines + if args.replace_newlines == "squash": + DicomTextArgs['replace_newline_char'] = '' + # --------------------------------------------------------------------- # Read the original DICOM file and parse the original text - dicomtext = DicomText.DicomText(args.input_dcm) + dicomtext = DicomText.DicomText(args.input_dcm, **DicomTextArgs) dicomtext.parse() # Read the annotated XML file diff --git a/src/applications/SRAnonTool/test/modify_SR.py b/src/applications/SRAnonTool/test/modify_SR.py new file mode 100755 index 000000000..f78aaf5a6 --- /dev/null +++ b/src/applications/SRAnonTool/test/modify_SR.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Read in report02.dcm and write report02mod.dcm +# with all instances of "The patient" replaced by one of the actual names +# found in the DICOM tags. + +import random +import pydicom + +ds = pydicom.dcmread('report02.dcm') + +names = set() +for elem in ds.iterall(): + if elem.VR == 'PN': + names.add(str(elem.value)) + +name_parts = set() +for name in names: + # family name complex, given name complex, middle name, name prefix, name suffix + for part in name.split('^'): + for s in part.split(' '): + if len(s) > 3: + name_parts.add(s) +name_parts = list(name_parts) +print(name_parts) + +print(random.sample(name_parts, 1)[0]) + +for elem in ds.iterall(): + if 'The patient' in str(elem.value): + elem.value = str(elem.value).replace('The patient', random.sample(name_parts, 1)[0]) + if 'the patient' in str(elem.value): + elem.value = str(elem.value).replace('the patient', random.sample(name_parts, 1)[0]) + +ds.save_as('report02mod.dcm') diff --git a/src/common/Smi_Common_Python/SmiServices/Dicom.py b/src/common/Smi_Common_Python/SmiServices/Dicom.py index c85c9d2e1..785f1abc8 100644 --- a/src/common/Smi_Common_Python/SmiServices/Dicom.py +++ b/src/common/Smi_Common_Python/SmiServices/Dicom.py @@ -35,7 +35,7 @@ def tag_alt(tag): alt = '{:0>8X}'.format(pydicom.datadict.tag_for_keyword(tag)) return(alt) -def tag_val(dicomdict, tagname): +def tag_val(dicomdict, tagname, atomic = False): """ Look up dicomdict['tagname'] where tagname can be a hex string or a name string and the dicomdict can hold either the hex string or the name @@ -48,7 +48,7 @@ def tag_val(dicomdict, tagname): retval = dicomdict[tagname] elif alt_tagname in dicomdict: retval = dicomdict[alt_tagname] - # The dcm2jsom or pydicom style has 'vr' and 'Value' keys + # The dcm2json or pydicom style has 'vr' and 'Value' keys # so extract the Value (also sometimes has vr but no Value). if isinstance(retval, Mapping): if 'vr' in retval: @@ -57,9 +57,9 @@ def tag_val(dicomdict, tagname): val = retval.get('val', '') # but I've also seen val retval = val # Single element list reduced to just the first element - # but doing this breaks the assertions below. - #if isinstance(retval, list) and len(retval)==1: - # retval = retval[0] + # only if you explicitly request this with atomic=True. + if isinstance(retval, list) and len(retval)==1 and atomic: + retval = retval[0] return(retval) def tag_is(tagA, tagB): diff --git a/src/common/Smi_Common_Python/SmiServices/DicomText.py b/src/common/Smi_Common_Python/SmiServices/DicomText.py index 969cc9bf1..0723f0f1a 100644 --- a/src/common/Smi_Common_Python/SmiServices/DicomText.py +++ b/src/common/Smi_Common_Python/SmiServices/DicomText.py @@ -5,6 +5,7 @@ import pydicom import re import random +from SmiServices import Dicom from SmiServices.StructuredReport import sr_keys_to_extract, sr_keys_to_ignore from SmiServices.StringUtils import string_match_ignore_linebreak, redact_html_tags_in_string @@ -124,29 +125,54 @@ def tag(self, tagname): else: return '' - def _dataset_read_callback(self, dataset, data_element): - """ Internal function called during a walk of the dataset. - Builds a class-member string _p_text as it goes. + def list_of_PNAMEs(self): + """ Return a list of the values of all tags with a VR of PN + """ + names = set() + for elem in self._dicom_raw.iterall(): + if elem.VR == 'PN' and len(str(elem.value)): + names.add(str(elem.value)) + return list(names) + + def _data_element_parser(self, data_element): + """ Internal function called by the parse and redact callbacks + to consistently convert the data_element into the string which + will be returned, in both raw and html-redacted versions. + Returns the tuple (rc, rc_parsed). + If html redaction is disabled then rc_parsed == rc. """ rc = '' + rc_parsed = '' if data_element.VR in ['SH', 'CS', 'SQ', 'UI']: # "SH" Short String, "CS" Code String, "SQ" Sequence, "UI" UID ignored pass elif data_element.VR == 'LO': # "LO" Long String typically used for headings - rc = rc + ('# %s' % str(data_element.value)) + '\n' + rc = rc_parsed = rc + ('# %s' % str(data_element.value)) + '\n' else: rc = rc + ('%s' % (str(data_element.value))) + rc += '\n' # Replace HTML tags with spaces if self._replace_HTML_entities and '<' in rc: - rc = redact_html_tags_in_string(rc, + rc_parsed = redact_html_tags_in_string(rc, replace_char = self._replace_HTML_char, replace_newline = self._replace_newline_char) - rc += '\n' - if rc == '': + else: + rc_parsed = rc + return (rc, rc_parsed) + + def _dataset_read_callback(self, dataset, data_element): + """ Internal function called during a walk of the dataset. + Builds a class-member string _p_text as it goes. + """ + rc, rc_parsed = self._data_element_parser(data_element) + if rc_parsed == '': return - self._offset_list.append( { 'offset':len(self._p_text), 'string': rc} ) - self._p_text = self._p_text + rc + self._offset_list.append( { + 'offset':len(self._p_text), + 'string': rc_parsed + } ) + self._p_text = self._p_text + rc_parsed def parse(self): """ Walk the dataset to extract the text which can then be @@ -157,10 +183,16 @@ def parse(self): # except explicitly do not include TextValue, handled below list_of_tagname_desired = [ k['tag'] for k in sr_keys_to_extract ] if self._include_header: + # Add all the known [[something]] headers for srkey in sr_keys_to_extract: if srkey['tag'] in self._dicom_raw and srkey['tag'] != 'TextValue': line = '[[%s]] %s\n' % (srkey['label'], srkey['decode_func'](str(self._dicom_raw[srkey['tag']].value))) self._p_text = self._p_text + line + # Collect all names in the whole document and add [[Other Names]] header + names_list = self.list_of_PNAMEs() + for name in names_list: + line = '[[Other Names]] %s\n' % Dicom.sr_decode_PNAME(name) + self._p_text = self._p_text + line # Now read ALL tags and use a blacklist (and ignore already done in whitelist). # Private tags will have tagname='' so ignore those too. if self._include_header: @@ -210,6 +242,10 @@ def redact_string(self, plaintext, offset, rlen, VR): redact_char = DicomText._redact_char_digit if DicomText._redact_random_length: redact_length = random.randint(-int(rlen/2), int(rlen/2)) + # Replace all dates with 11111111 to that they validate ok + if VR in ['DA', 'DT']: + redact_length = 8 + redact_char = '1' redacted_part = redact_char.rjust(redact_length, redact_char) if redact_char else '' rc = plaintext[0:offset] + redacted_part + plaintext[offset+rlen:] return rc @@ -219,22 +255,17 @@ def _dataset_redact_callback(self, dataset, data_element): Builds a class-member string _r_text as it goes. Uses the annotation list in self._annotations to redact text. """ - - rc = '' - if data_element.VR in ['SH', 'CS', 'SQ', 'UI']: - pass - elif data_element.VR == 'LO': - rc = rc + ('# %s' % str(data_element.value)) + '\n' - else: - rc = rc + ('%s' % (str(data_element.value))) + '\n' - if rc == '': + rc, rc_parsed = self._data_element_parser(data_element) + if rc_parsed == '': return + rc_without_html = rc_parsed # The current string is now len(self._r_text) ..to.. +len(rc) current_start = len(self._r_text) current_end = current_start + len(rc) replacement = rc replacedAny = False #print('At %d = %s' % (current_start, str(data_element.value))) + # Check every annotation to see, if not already done, if it appears in this rc for annot in self._annotations: # Sometimes it reports text:None so ignore if not annot['text'] or (annot['start_char'] == annot['end_char']): @@ -252,9 +283,6 @@ def _dataset_redact_callback(self, dataset, data_element): # SemEHR may have an extra line at the start so start_char offset need adjusting for offset in [self._redact_offset] + list(range(-32, 32)): # Do the comparison using text without html but replace inside text with html - rc_without_html = redact_html_tags_in_string(rc, - replace_char = self._replace_HTML_char, - replace_newline = self._replace_newline_char) if self._replace_HTML_entities else rc if string_match_ignore_linebreak(rc_without_html[annot_at+offset : annot_end+offset], annot['text']): replacement = self.redact_string(replacement, annot_at+offset, annot_end-annot_at, data_element.VR) replaced = replacedAny = True @@ -270,8 +298,10 @@ def _dataset_redact_callback(self, dataset, data_element): # Always fully redact the content of PersonName and Date tags replacement = self.redact_string(rc, 0, len(rc), data_element.VR) replacedAny = True + # Put this replacement value back into the DICOM if replacedAny: data_element.value = replacement + # _r_text is the original, _redacted_text has been redacted self._r_text = self._r_text + rc self._redacted_text = self._redacted_text + replacement return replacement if replacedAny else None @@ -401,6 +431,7 @@ def test_DicomText(): [[Patient Birth Date]] 19781024 [[Patient Sex]] M [[Referring Physician Name]] +[[Other Names]] John R Walz [[ContentSequence]] # Request MRI: Knee diff --git a/src/common/Smi_Common_Python/SmiServices/StructuredReport.py b/src/common/Smi_Common_Python/SmiServices/StructuredReport.py index d9ae7ed2e..2d99e7b4f 100644 --- a/src/common/Smi_Common_Python/SmiServices/StructuredReport.py +++ b/src/common/Smi_Common_Python/SmiServices/StructuredReport.py @@ -14,6 +14,29 @@ # List of known keys which we either parse or can safely ignore # (all the others will be reported during testing to ensure no content is missed). sr_keys_to_extract = [ + # These are all of type PN: + { 'label':'Consulting Physician Name', 'tag':'ConsultingPhysicianName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Performing Physician Name', 'tag':'PerformingPhysicianName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Name Of Physicians Reading Study', 'tag':'NameOfPhysiciansReadingStudy', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Operators Name', 'tag':'OperatorsName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Other Patient Names', 'tag':'OtherPatientNames', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Patient Birth Name', 'tag':'PatientBirthName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Patient Mother Birth Name', 'tag':'PatientMotherBirthName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Secondary Reviewer Name', 'tag':'SecondaryReviewerName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Evaluator Name', 'tag':'EvaluatorName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Scheduled Performing Physician Name', 'tag':'ScheduledPerformingPhysicianName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Names Of Intended Recipients Of Results', 'tag':'NamesOfIntendedRecipientsOfResults', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Human Performer Name', 'tag':'HumanPerformerName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Verifying Observer Name', 'tag':'VerifyingObserverName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Person Name', 'tag':'PersonName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Content Creator Name', 'tag':'ContentCreatorName', 'decode_func':Dicom.sr_decode_PNAME }, + { 'label':'Reviewer Name', 'tag':'ReviewerName', 'decode_func':Dicom.sr_decode_PNAME }, + # Plain text: + { 'label':'Institution Name', 'tag':'InstitutionName', 'decode_func':Dicom.sr_decode_plaintext }, + { 'label':'Institution Address', 'tag':'InstitutionAddress', 'decode_func':Dicom.sr_decode_plaintext }, + { 'label':'Institutional Department Name', 'tag':'InstitutionalDepartmentName', 'decode_func':Dicom.sr_decode_plaintext }, + { 'label':'Patient Address', 'tag':'PatientAddress', 'decode_func':Dicom.sr_decode_plaintext }, + # First version: { 'label':'Study Description', 'tag':'StudyDescription', 'decode_func':Dicom.sr_decode_plaintext }, { 'label':'Study Date', 'tag':'StudyDate', 'decode_func':Dicom.sr_decode_date }, { 'label':'Series Description', 'tag':'SeriesDescription', 'decode_func':Dicom.sr_decode_plaintext }, @@ -49,7 +72,6 @@ 'Modality', 'ModalitiesInStudy', 'Manufacturer', - 'InstitutionName', 'ReferencedPerformedProcedureStepSequence', 'TypeOfPatientID', 'IssuerOfPatientID', @@ -71,9 +93,7 @@ 'CodingSchemeIdentificationSequence', 'ImageType', 'SeriesTime', - 'InstitutionAddress', 'StationName', - 'InstitutionalDepartmentName', 'PhysiciansOfRecord', 'ManufacturerModelName', 'SoftwareVersions', @@ -116,21 +136,14 @@ 'LastMenstrualDate', 'MedicalRecordLocator', 'MilitaryRank', - 'NameOfPhysiciansReadingStudy', 'ObservationDateTime', 'Occupation', - 'OperatorsName', - 'OtherPatientNames', - 'PatientAddress', - 'PatientBirthName', 'PatientBirthTime', 'PatientComments', # XXX do we want this? 'PatientInsurancePlanCodeSequence', - 'PatientMotherBirthName', 'PatientReligiousPreference', 'PatientState', 'PatientTelephoneNumbers', - 'PerformingPhysicianName', 'PredecessorDocumentsSequence', 'PregnancyStatus', 'QualityControlImage', @@ -329,9 +342,13 @@ def _SR_output_string(self, keystr, valstr, fp): # The Key may also be a list but only take first element if isinstance(keystr, list): keystr = keystr[0] + # Handle a value like { "Alphabetic": "my name" } + if isinstance(valstr, dict) and 'Alphabetic' in valstr: + valstr = valstr['Alphabetic'] # If there is no value the do not print anything at all if valstr == None or valstr == '': return + # XXX should we be using Dicom.sr_decode_PNAME if it's a PN/PNAME ??? # Replace CRs with LF valstr = re.sub('\r+', '\n', valstr) # Replace all HTML @@ -348,6 +365,30 @@ def _SR_output_string(self, keystr, valstr, fp): fp.write('[[%s]] %s\n' % (keystr, valstr)) + # --------------------------------------------------------------------- + def find_PersonNames(self, json_dict, names_list): + """ Return a list of names (decoded from PN format having ^ separators) + from all of the 'PersonName' tags (which probably does not include any + top-level tags with a vr of PN). This is a recursive function. + Returns the names in the names_list list which must already exist. + """ + if isinstance(json_dict, list): + for item in json_dict: + self.find_PersonNames(item, names_list) + elif isinstance(json_dict, dict): + for item in json_dict.keys(): + # Ignore the MongoDB message metadata dict + if item == 'MessageHeader': + return + # We don't have the datatype to check for "PN" so assume + # anything ending with Name contains a PersonName + # This will also catch Institution Name and Observer Name + if item.endswith('Name'): + tagval = tag_val(json_dict, item, atomic=True) + names_list.append(Dicom.sr_decode_PNAME(tagval)) + # Check if the value of this item is a list or dict + self.find_PersonNames(json_dict[item], names_list) + # --------------------------------------------------------------------- # Internal function to parse a DICOM tag which calls itself recursively # when it finds a sequence @@ -406,6 +447,10 @@ def _SR_parse_key(self, json_dict, json_key, fp): print('UNEXPECTED KEY %s = %s' % (json_key, json_dict[json_key]), file=sys.stderr) def SR_parse(self, json_dict, doc_name, fp = sys.stdout): + """ Parse a Structured Report held in a python dictionary + which has come from MongoDB or from dcm2json + Output to the file pointer fp (default is stdout). + """ self._SR_output_string('Document name', doc_name, fp) @@ -414,6 +459,12 @@ def SR_parse(self, json_dict, doc_name, fp = sys.stdout): # _SR_output_string('Study Date', sr_decode_date(sr_get_key(json_dict, 'StudyDate'))) for sr_extract_dict in sr_keys_to_extract: self._SR_output_string(sr_extract_dict['label'], sr_extract_dict['decode_func'](Dicom.tag_val(json_dict, sr_extract_dict['tag'])), fp) + # Now output [[Other Names]] for all the elements having vr of PN + names_list = [] + self.find_PersonNames(json_dict, names_list) + #print(names_list) + for name in names_list: + self._SR_output_string('Other Names', name, fp) # Now output all the remaining tags which are not ignored for json_key in json_dict: @@ -440,6 +491,12 @@ def test_SR_parse_key(): ] }, "TextValue": { "vr": "UT", "Value": [ "MRI: Knee" ] } + }, + { + "RelationshipType": { "vr": "CS", "Value": [ "CONTAINS" ] }, + "ValueType": { "vr": "CS", "Value": [ "PNAME" ] }, + "ConceptNameCodeSequence": { "vr": "SQ", "Value": [ { "CodeMeaning": { "vr": "LO", "Value": [ "Physician" ] } } ] }, + "PersonName": { "vr": "PN", "Value": [ { "Alphabetic": "Klugman^^^Dr." } ] } } ] } @@ -452,18 +509,29 @@ def test_SR_parse_key(): with TemporaryFile(mode='w+', encoding='utf-8') as fd: sr._SR_parse_key(SR_dict, 'ContentSequence', fd) fd.seek(0) - assert(fd.read() == '[[Request]] MRI: Knee\n') + assert(fd.read() == '[[Request]] MRI: Knee\n[[Physician]] Klugman^^^Dr.\n') # Add some HTML into the string and check it's redacted SR_dict['ContentSequence']['Value'][0]['TextValue']['Value'][0] = "