In [50]:
#IMPORTS
import numpy as np
import pydicom as dicom
from pydicom.fileset import FileSet
import pathlib
import os

#https://medium.com/p/f1623a7f40b8
#https://gist.githubusercontent.com/alex-weston-13/4dae048b423f1b4cb9828734a4ec8b83/raw/59e49d168421a667645c9ab231d3993fe0752724/sort_dicoms.py

In [51]:
dicomfilepath = pathlib.PureWindowsPath('E:\\IFL\\digibiop\\0001211180\\DICOMDIR')
ds = dicom.dcmread(dicomfilepath)
print(ds)

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 156
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: Media Storage Directory Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 1.2.840.10008.123456.369990.2016008521.522962596.1218187497
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1
(0002, 0013) Implementation Version Name         SH: ''
-------------------------------------------------
(0004, 1130) File-set ID                         CS: 'SECTRA FILE SET'
(0004, 1141) File-set Descriptor File ID         CS: ''
(0004, 1142) Specific Character Set of File-set  CS: 'ISO_IR 100'
(0004, 1200) Offset of the First Directory Recor UL: 396
(0004, 1202) Offset of the Last Directory Record UL: 396
(0004, 1212) File-set Consistency Flag           US: 0
(0004, 1220)  Directory Reco

In [52]:
file = FileSet(ds)
# print(file)

In [53]:
src = "E:\\IFL\\digibiop\\0001211180\\DICOM\\0000D122\\AA06F95E\\AAF40D12"
dst = "E:\\IFL\\digibiop\\Copies"

print('reading file list...')
unsortedList = []
for root, dirs, files in os.walk(src):
    for file in files: 
#         if ".file" in file:# exclude non-dicoms, good for messy folders
            unsortedList.append(os.path.join(root, file))

print('%s files found.' % len(unsortedList))

reading file list...
2667 files found.


In [54]:
testfile = dicom.dcmread('E:\\IFL\\digibiop\\0001211180\\DICOM\\0000D122\\AA06F95E\\AAF40D12\\0000E2BE\\EE0A8535')
print(testfile[0x0008, 0x103e])

(0008, 103e) Series Description                  LO: 'Becken_T2_sag'


In [55]:
#EXCLUDE PUNCTUATIONS

def clean_text(string):
    # clean and standardize text descriptions, which makes searching files easier
    forbidden_symbols = ["*", ".", ",", "\"", "\\", "/", "|", "[", "]", ":", ";", " "]
    for symbol in forbidden_symbols:
        string = string.replace(symbol, "_") # replace everything with an underscore
    return string.lower()

In [75]:
for dicom_loc in unsortedList:
    # read the file
    ds = dicom.read_file(dicom_loc, force=True)
    
    # get patient, study, and series information
    patientID = clean_text(ds.get("PatientID", "NA"))
    studyDate = clean_text(ds.get("StudyDate", "NA"))
    studyDescription = clean_text(ds.get("StudyDescription", "NA"))
    seriesDescription = clean_text(ds.get("SeriesDescription", "NA"))
#     print(type(ds[0x0008, 0x103e])) #<class 'pydicom.dataelem.DataElement'>
    # generate new, standardized file name
    modality = ds.get("Modality","NA")
    studyInstanceUID = ds.get("StudyInstanceUID","NA")
    seriesInstanceUID = ds.get("SeriesInstanceUID","NA")
    instanceNumber = str(ds.get("InstanceNumber","0"))
    fileName = modality + "." + seriesInstanceUID + "." + instanceNumber + ".dcm"
#     fileName = modality + "." + seriesInstanceUID + "." + instanceNumber

    # save files to a 4-tier nested folder structure
    if not os.path.exists(os.path.join(dst, patientID)):
            os.makedirs(os.path.join(dst, patientID))
    if not os.path.exists(os.path.join(dst, patientID, studyDate)):
            os.makedirs(os.path.join(dst, patientID, studyDate))
    if not os.path.exists(os.path.join(dst, patientID, studyDate, studyDescription)):
            os.makedirs(os.path.join(dst, patientID, studyDate, studyDescription))
    if not os.path.exists(os.path.join(dst, patientID, studyDate, studyDescription, seriesDescription)):
            os.makedirs(os.path.join(dst, patientID, studyDate, studyDescription, seriesDescription))
            print('Saving out file: %s - %s - %s - %s.' % (patientID, studyDate, studyDescription, seriesDescription ))
    ds.save_as(os.path.join(dst, patientID, studyDate, studyDescription, seriesDescription, fileName))

print('done.')
    

Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - dce_subb.
Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - pelvis_t2_spc_rst_tra_p2_iso.
Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - pelvis_ep2d_diff_tra.
Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - becken_fus_t2_tra.
Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - iauc60b.
Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - dce_sub.
Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - pelvis_ep2d_diff_tra_adc.
Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - iauc60.
Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - pelvis_mrac_pet_mlaa_siemens_becken_1bp_15min_lm_nac_images.
Saving out file: anonk5n8qe1br - 20140924 - mrt_becken_nativ_und_mit_km - _pelvis_mrac_pet_mlaa_siemens_becken_1bp_15min_lm_ac_images.
Savin

In [76]:
testfile1 = dicom.dcmread('E:\\IFL\\digibiop\\Copies\\anonk5n8qe1br\\20140924\\mrt_becken_nativ_und_mit_km\\pelvis_t2_spc_rst_tra_p2_iso\\MR.1.3.12.2.1107.5.2.38.51004.2014092410442296352211977.0.0.0.7.dcm')
print(testfile1)

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 198
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: MR Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 1.3.12.2.1107.5.2.38.51004.2014092410523236839212252
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.840.113654.2.3.1995.2.12.0
(0002, 0013) Implementation Version Name         SH: 'MIRCTN16NOV2000'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'M', 'ND', 'NORM']
(0008, 0012) Instance Creation Date              DA: '20140924'
(0008, 0013) Instance Creation Time              TM: '105233.140000'
(0008, 0016) SOP Class UID                       UI: MR Image Storage
(00

done.
