In [1]:
import os, string 
from pathlib import Path
import pydicom as dicom
from tqdm import tqdm_notebook
from pydicom.errors import InvalidDicomError
import numpy as np
import sys

import logging
logger = logging.getLogger('importer')

In [2]:
from mammograpy_dicom_parser import find_dicoms, find_mammograms, make_patient_mapping

In [3]:
path = '/data/archives/DCIS/'
dicoms = find_dicoms(path)
mammograms, patient_ids, failed_to_parse = find_mammograms(dicoms)

with open('failed_to_parse.log', 'a') as f:
    for line in failed_to_parse:
        f.write(line + '\n')


/data/archives/DCIS/DICOMS does not contain dicom files, but 5 other files.


HBox(children=(IntProgress(value=0, max=4220), HTML(value='')))

Failed to import /data/archives/DCIS/DICOMS/DICOMS_2006-2010/Pt 520 (Mw) A.30.0000520/19300603 (Hosp 1-MASTERPAGE)/DICOM/IM_000002.dcm with error: 'FileDataset' object has no attribute 'PixelData'
Failed to import /data/archives/DCIS/DICOMS/DICOMS_2006-2010/Pt 520 (Mw) A.30.0000520/19300603 (Hosp 1-MASTERPAGE)/DICOM/IM_000004.dcm with error: 'FileDataset' object has no attribute 'PixelData'
Failed to import /data/archives/DCIS/DICOMS/DICOMS_2006-2010/Pt 520 (Mw) A.30.0000520/19300603 (Hosp 1-MASTERPAGE)/DICOM/IM_000001.dcm with error: 'FileDataset' object has no attribute 'PixelData'
Failed to import /data/archives/DCIS/DICOMS/DICOMS_2006-2010/Pt 520 (Mw) A.30.0000520/19300603 (Hosp 1-MASTERPAGE)/DICOM/IM_000003.dcm with error: 'FileDataset' object has no attribute 'PixelData'
Failed to import /data/archives/DCIS/DICOMS/DICOMS_rareoriginalrange/Pt_125/IM_000002.dcm with error: File is missing DICOM File Meta Information header or the 'DICM' prefix is missing from the header. Use force=




In [4]:
mapping = make_patient_mapping(patient_ids)

In [47]:
from collections import defaultdict

def rewrite_structure(mammograms_dict, mapping):
    studies_per_patient = defaultdict(list)
    uid_mapping = {}
    for fn in mammograms_dict:
        study_instance_uid = mammograms_dict[fn]['StudyInstanceUID']
        patient_id = mammograms_dict[fn]['PatientID']
        if study_instance_uid not in studies_per_patient[patient_id]:
            studies_per_patient[patient_id].append(study_instance_uid)
            
    for patient_id in studies_per_patient:
        for idx, study_instance_uid in enumerate(studies_per_patient[patient_id]):
            uid_mapping[study_instance_uid] = '{:2d}'.format(idx + 1).replace(' ', '0')
        
    return studies_per_patient, uid_mapping

def create_temporary_file_structure(mammograms, new_path='/home/jonas/DCIS'):
    for fn in mammograms:
        patient_id = mammograms[fn]['PatientID']
        study_instance_uid = mammograms[fn]['StudyInstanceUID']
        folder_name = Path(mapping[patient_id] + uid_mapping[study_instance_uid])

        f = new_path / folder_name
        f.mkdir(exist_ok=True)
        fn = Path(fn)
        try:
            os.symlink(fn, f / Path(fn.stem + fn.suffix))
        except FileExistsError as e:
             logger.info(f'Symlinking for {fn} already exists.')

In [48]:
studies_per_patient, uid_mapping = rewrite_structure(mammograms, mapping)
create_temporary_file_structure(mammograms)