# Create DICOM files from JPGs for ISIC Challenge
This notebook contains the code to encapsulate JPG files from a (set of)
subfolder(s) into a target folder, using a CSV file as metadata, mapping
age, sex, and anatomic site to DICOM attributes/header fields.

In [1]:
# imports
from datetime import datetime
import glob
import json
import os
import shutil
import subprocess

import pandas as pd

In [2]:
# definitions
csvfile = '2020_Challenge.csv'
sourcefolder = 'JPGs'
targetfolder = 'DICOMs'
if not os.path.exists(targetfolder):
    os.mkdir(targetfolder)

In [3]:
def encapsulateJPGinDICOM(jpgfile:str, settings:dict, targetfolder:str) -> bool:
    """
    Takes the filename of a JPG image and encapsulated it in DICOM.
    
    Parameters
    ----------
    jpgfile : str
        Filename of the JPG image to encapsulate.
    settings : dict
        Settings (Characteristics) to make in DICOM header.
    targetfolder : str
        Foldername where to place the DICOM file (same name as JPG).
    
    Returns
    -------
    success : bool
        True if DICOM was created successfully.
        
    Notes
    -----
    The code requires that the following JAVA archives (JAR files) be
    present in the "java" subfolder from the path where this notebook is
    stored (which is determined using the _dh[0] history of the %cd macro).
    """
    
    # anatomic site mappings
    anat_mappings = {
        'blank': { 'cv': '39937001', 'csd': 'SCT', 'cm': 'Skin' },
        'head/': { 'cv': '70762009', 'csd': 'SCT', 'cm': 'Skin of head' },
        'lower': { 'cv': '281739007', 'csd': 'SCT', 'cm': 'Skin of part of lower limb' },
        'torso': { 'cv': '86381001', 'csd': 'SCT', 'cm': 'Skin of trunk' },
        'upper': { 'cv': '281733008', 'csd': 'SCT', 'cm': 'Skin of part of upper limb' },
    }
    
    # file exists
    if not os.path.exists(jpgfile):
        return False
    if len(jpgfile) < 5 or (jpgfile[-4:].lower() != '.jpg' and jpgfile[-5:].lower() != '.jpeg'):
        return False
    jpglocalfile = jpgfile.split(os.sep)[-1]
    jpgfileparts = jpglocalfile.split('.')
    jpgfilename = '.'.join(jpgfileparts[:-1])
    if len(jpgfilename) == 12 and jpgfilename[0:5] == 'ISIC_':
        ostudy_id = jpgfilename
    else:
        ostudy_id = 'ISIC_#######'
    if not os.path.exists(targetfolder):
        return False
    javatarget = targetfolder
    
    # date/time
    now = datetime.now()
    nowdate = str(now.date()).replace('-', '')
    nowtime = str(now.time()).replace(':', '')[0:6]
    
    # paths for JAVA files
    selffolder = _dh[0]
    javafolder = selffolder + '/' + 'java' + '/'
    if os.sep != '/':
        jpgfile = jpgfile.replace(os.sep, '/')
        javafolder = javafolder.replace(os.sep, '/')
        javatarget = javatarget.replace(os.sep, '/')
    imageio_jar = javafolder + 'jai_imageio.jar'
    javax_jar = javafolder + 'javax.json-1.0.4.jar'
    pixelmed_jar = javafolder + 'pixelmed.jar'
    codec_jar = javafolder + 'pixelmed_codec.jar'
    
    # sanitize dictionary
    characteristics_dict = {
        'options': {
            'ReplaceCodingSchemeIdentificationSequence': True,
            'AppendToContributingEquipmentSequence': False,
        },
        'remove': {
            'Laterality': None,
        },
        'top': {
            'PatientAge': '000Y',
            'PatientID': 'IP_0000000',
            'PatientSex': 'X',
            'StudyDate': nowdate,
            'StudyTime': nowtime,
            'ContentDate': nowdate,
            'ContentTime': nowtime,
            'StudyDescription': 'ISIC 2020 Grand Challenge image',
            'StudyID': ostudy_id,
            'InstitutionName': 'ISDIS',
            'ReferringPhysicianName': '',
            'BodyPartExamined': 'SKIN',
            'AnatomicRegionSequence': anat_mappings['blank'],
        },
    }
    patient_id = None
    study_id = None
    if isinstance(settings, dict):
        age = None
        if 'age' in settings:
            age = settings['age']
        if 'PatientAge' in settings:
            age = settings['PatientAge']
        if 'age_approx' in settings:
            age = settings['age_approx']
        if isinstance(age, float) or isinstance(age, int):
            age = 5 * int(0.2 * float(age))
            if age >= 0 and age < 1000:
                characteristics_dict['top']['PatientAge'] = '{0:03d}Y'.format(age)
        if 'patient_id' in settings:
            patient_id = settings['patient_id']
        if 'PatientID' in settings:
            patient_id = settings['PatientID']
        if isinstance(patient_id, str) and len(patient_id) > 3 and patient_id[0:3] == 'IP_':
            characteristics_dict['top']['PatientID'] = patient_id
        sex = None
        if 'sex' in settings:
            sex = settings['sex']
        if 'PatientSex' in settings:
            sex = settings['PatientSex']
        if isinstance(sex, str) and len(sex) > 0:
            sex = sex[0].upper()
            if sex in 'MF':
                characteristics_dict['top']['PatientSex'] = sex
        anat_site = None
        if 'anatom_site_general_challenge' in settings:
            anat_site = settings['anatom_site_general_challenge']
        if isinstance(anat_site, str) and len(anat_site) > 4:
            characteristics_dict['top']['BodyPartExamined'] = anat_site.upper()
            anat_site = anat_site[0:5].lower()
            if anat_site in anat_mappings:
                characteristics_dict['top']['AnatomicRegionSequence'] = anat_mappings[anat_site]
        if 'image_id' in settings:
            study_id = settings['image_id']
        if 'ImageID' in settings:
            study_id = settings['ImageID']
        if 'StudyID' in settings:
            study_id = settings['StudyID']
        if isinstance(study_id, str) and len(study_id) > 4 and study_id[0:5] == 'ISIC_':
            characteristics_dict['top']['StudyID'] = study_id
        else:
            study_id = None
    if study_id is None:
        study_id = ostudy_id
    init_id = study_id.replace('_', '^')

    # process JPG to DICOM
    jsonfilename = selffolder + os.sep + jpgfilename + '.json'
    dicomrawfile = targetfolder + os.sep + 'temp_' + jpgfilename + '.dcm'
    javarawfile = dicomrawfile
    if os.sep != '/':
        javarawfile = dicomrawfile.replace(os.sep, '/')
    try:
        with open(jsonfilename, 'w') as tempjson:
            json.dump(characteristics_dict, tempjson)
    except Exception as e:
        print(str(e))
        return False
    try:
        with open(jsonfilename, 'r') as tempjson_r:
            njson = json.load(tempjson_r)
    except Exception as e:
        print(str(e))
        return False
    try:
        jcmd = ('java -Djava.awt.headless=true -cp ' +
            '"' + pixelmed_jar + ';' + imageio_jar + ';' + javax_jar + '" ' +
            'com.pixelmed.dicom.EncapsulateImageInDicom "' + jpgfile + '" "' +
            javarawfile + '" "' + init_id + '" "' + study_id +
            '" "1" "1" "1" XC 1.2.840.10008.5.1.4.1.1.77.1.4')
        #print(jcmd)
        jproc = subprocess.Popen(jcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        (sout_e, serr_e) = jproc.communicate()
        jproc.kill()
        #print(sout_e)
        #print(serr_e)
    except Exception as e:
        print(str(e))
        os.remove(jsonfilename)
        return False
    try:
        dcmtempdir = targetfolder + os.sep + jpgfilename + '_tmpdir'
        javatempdir = dcmtempdir
        javajsonfile = jsonfilename
        if os.sep != '/':
            javatempdir = dcmtempdir.replace(os.sep, '/')
            javajsonfile = jsonfilename.replace(os.sep, '/')
        os.mkdir(dcmtempdir)
        jcmd = ('java -Djava.awt.headless=true -cp ' +
            '"' + pixelmed_jar + ';' + imageio_jar + ';' + javax_jar + '" ' +
            'com.pixelmed.apps.SetCharacteristicsFromSummary "' +
            javajsonfile + '" "' + javarawfile + '" "' + javatempdir + '"')
        #print(jcmd)
        jproc = subprocess.Popen(jcmd,
            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        (sout_s, serr_s) = jproc.communicate()
        jproc.kill()
        #print(sout_e)
        #print(serr_e)
        dcmout = glob.glob(dcmtempdir + os.sep + '*' + os.sep + '*' + os.sep + '*' + os.sep + '*.dcm')
        os.replace(dcmout[0], targetfolder + os.sep + jpgfilename + '.dcm')
        shutil.rmtree(dcmtempdir)
        os.remove(dicomrawfile)
    except Exception as e:
        print(str(e))
        try:
            os.remove(dicomrawfile)
        except:
            pass
        os.remove(jsonfilename)
        return False
    os.remove(jsonfilename)
    return True

In [4]:
# load CSV
csv = pd.read_csv(csvfile).set_index('image_name')

In [5]:
# find JPGs
jpgs = glob.glob(sourcefolder + os.sep + '*.jpg')
if len(jpgs) == 0:
    jpgs = glob.glob(sourcefolder + os.sep + '*' + os.sep + '*.jpg')
    if len(jpgs) == 0:
        raise RuntimeError('No JPG files found.')

In [6]:
# process images
print('Processing {0:d} JPGs to DICOMs...'.format(len(jpgs)))
for idx,jpgfile in enumerate(jpgs):
    if idx % 100 == 0:
        print(idx)
    image_name = jpgfile.split(os.sep)[-1].split('.')[0]
    try:
        image_settings = dict(csv.loc[image_name])
        if not 'patient_id' in image_settings:
            raise RuntimeError('Image ' + image_name + ' not in metadata CSV.')
    except Exception as e:
        print(str(e))
        continue
    encapsulateJPGinDICOM(jpgfile, image_settings, targetfolder)
    break

Processing 44108 JPGs to DICOMs...
0
