In [1]:
import os
import glob
import re
import numpy as np
import pydicom
from pydicom.filereader import read_dicomdir


data_dir = 'D:/CQ500/CQ500 dataset/'

for dir_entry in os.scandir(path = data_dir):
    subject_dir_dict = dict()
    if dir_entry.is_dir():
        subject_dir_dict[dir_entry.name] = list()
        # print(dir_entry.name)
        
        # loop through sub-directories and exclude some by name
        for sub_dir in os.scandir(dir_entry.path + "/Unknown Study/"):
            if sub_dir.is_dir() and not re.search("(POST|(?<!PRE )CONTRAST|BONE)", sub_dir.name):
                dcm_files = glob.glob(sub_dir.path + "/*.dcm")
                dicom_data = pydicom.read_file(dcm_files[0])

                # add info for these sub-directories to a dictionary
                sub_info = [sub_dir.name, float(dicom_data.PixelSpacing[0]), float(dicom_data.PixelSpacing[1]), float(dicom_data.SliceThickness), len(dcm_files)]
                subject_dir_dict[dir_entry.name].append(sub_info)
                # print(" -{}".format(subject_dir_dict[dir_entry.name][-1]))

        # warn if no usable directories found
        if(len(subject_dir_dict[dir_entry.name]) < 1):
            print("Warning: {subject} found no usable subdirectories".format(subject=dir_entry.name))
            subject_dir_dict.pop(dir_entry.name)    # remove the entry from the dict

        elif(len(subject_dir_dict[dir_entry.name]) > 1):
            
            # parse the remaining directories to find the best candidate scan
            subject_scans = subject_dir_dict[dir_entry.name]

            scans_info = np.array(subject_scans)
            sorted_idx = np.argsort(scans_info[:, 3])
            
            # check for a 5mm slice scan
            int(scans_info[sorted_idx[-1], 3].astype(float))
            if((scans_info[sorted_idx[-1], 3].astype(float)) == 5.0):
                scans_info_5 = scans_info[(scans_info[:, 3].astype(float)) == 5.0]
                print(scans_info_5)
                
                # check if there are two 5mm slice scans
                if(float(scans_info[sorted_idx[-2], 3]) == 5.0):
                    # if more than one 5mm slice scan, use the study with fewest slices
                    sorted_slices_idx = np.argsort(scans_info[:, 4])
                    subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_slices_idx[-1]]  # replace with fewest slices study
                    if(subject_dir_dict[dir_entry.name][4] < 32):
                        print("Warning: Fewer than 32 slices for {}".format(dir_entry.path + "/" + subject_dir_dict[dir_entry.name][0]))
                else:
                    subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_idx[-1]]         # replace entries with the 5mm study

            else:   # no 5mm slice scans, so choose the smallest
                subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_idx[0]]     # replace with the small slice scan


    # convert the chosen study folder to nifti for further processing
    # print(dir_entry.path + "/" + dir_entry.name)



[['CT Plain' '0.451172' '0.451172' '5.0' '30']]
[['CT Plain' '0.431641' '0.431641' '5.0' '28']]
[['CT Plain' '0.453125' '0.453125' '5.0' '30']]
[['CT 55mm Plain' '0.429688' '0.429688' '5.0' '32']]
[['CT Plain' '0.431641' '0.431641' '5.0' '28']]
[['CT PRE CONTRAST 5MM STD' '0.392578' '0.392578' '5.0' '28']]
[['CT 55mm Plain' '0.429688' '0.429688' '5.0' '32']]
[['CT 55mm Plain' '0.474609' '0.474609' '5.0' '32']
 ['CT 55mm Plain-2' '0.474609' '0.474609' '5.0' '32']]
[['CT 5mm' '0.496094' '0.496094' '5.0' '32']]
[['CT 5mm' '0.488281' '0.488281' '5.0' '32']]
[['CT 55mm Plain' '0.488281' '0.488281' '5.0' '32']]
[['CT 55mm Plain' '0.503906' '0.503906' '5.0' '36']]
[['CT Plain' '0.417969' '0.417969' '5.0' '34']]
[['CT Plain' '0.46875' '0.46875' '5.0' '34']]
[['CT Plain' '0.488281' '0.488281' '5.0' '28']]
[['CT PRE CONTRAST 5MM STD' '0.488281' '0.488281' '5.0' '30']]
[['CT PRE CONTRAST 5MM STD' '0.488281' '0.488281' '5.0' '33']]
[['CT 55mm Plain' '0.490234' '0.490234' '5.0' '36']]
[['CT Plain' 

 ['CT Plain' '0.488281' '0.488281' '5.0' '30']]
[['CT 55mm Plain' '0.494141' '0.494141' '5.0' '36']]
[['CT 55mm Plain' '0.46875' '0.46875' '5.0' '32']]
[['CT Plain' '0.423828' '0.423828' '5.0' '28']]
[['CT Plain' '0.488281' '0.488281' '5.0' '32']]
[['CT 5mm' '0.544922' '0.544922' '5.0' '32']]
[['CT 5mm' '0.519531' '0.519531' '5.0' '36']]
[['CT 55mm Plain' '0.480469' '0.480469' '5.0' '36']]
[['CT PRE CONTRAST 5MM STD' '0.488281' '0.488281' '5.0' '28']]
[['CT 5mm' '0.488281' '0.488281' '5.0' '28']
 ['CT 5mm-2' '0.488281' '0.488281' '5.0' '32']
 ['CT 5mm-3' '0.488281' '0.488281' '5.0' '16']]
[['CT PRE CONTRAST 5MM STD' '0.449219' '0.449219' '5.0' '30']]
[['CT Plain' '0.488281' '0.488281' '5.0' '32']]
[['CT 55mm Plain' '0.486328' '0.486328' '5.0' '36']]
[['CT Plain' '0.423828' '0.423828' '5.0' '30']]
[['CT 55mm Plain' '0.492188' '0.492188' '5.0' '32']]
[['CT 5mm' '0.521484' '0.521484' '5.0' '32']]
[['CT Plain' '0.488281' '0.488281' '5.0' '30']
 ['CT Plain-2' '0.488281' '0.488281' '5.0' '30

InvalidDicomError: File is missing DICOM File Meta Information header or the 'DICM' prefix is missing from the header. Use force=True to force reading.

In [2]:
import os
import glob
import re
import numpy as np
import pydicom
from pydicom.filereader import read_dicomdir

data_dir = 'D:/CQ500/CQ500 dataset/'

for dir_entry in os.scandir(path=data_dir):
    subject_dir_dict = dict()
    if dir_entry.is_dir():
        subject_dir_dict[dir_entry.name] = list()
        # print(dir_entry.name)

        # loop through sub-directories and exclude some by name
        for sub_dir in os.scandir(dir_entry.path + "/Unknown Study/"):
            if sub_dir.is_dir() and not re.search("(POST|(?<!PRE )CONTRAST|BONE)", sub_dir.name):
                dcm_files = glob.glob(sub_dir.path + "/*.dcm")
                
                if len(dcm_files) == 0:
                    print(f"No DICOM files found in sub-directory: {sub_dir.path}")
                    continue

                try:
                    dicom_data = pydicom.read_file(dcm_files[0], force=True)
                    # add info for these sub-directories to a dictionary
                    sub_info = [
                        sub_dir.name, 
                        float(dicom_data.PixelSpacing[0]), 
                        float(dicom_data.PixelSpacing[1]), 
                        float(dicom_data.SliceThickness), 
                        len(dcm_files)
                    ]
                    subject_dir_dict[dir_entry.name].append(sub_info)
                    # print(" -{}".format(subject_dir_dict[dir_entry.name][-1]))

                except Exception as e:
                    print(f"Error reading DICOM file {dcm_files[0]}: {e}")
                    continue

        # warn if no usable directories found
        if len(subject_dir_dict[dir_entry.name]) < 1:
            print("Warning: {subject} found no usable subdirectories".format(subject=dir_entry.name))
            subject_dir_dict.pop(dir_entry.name)  # remove the entry from the dict

        elif len(subject_dir_dict[dir_entry.name]) > 1:
            # parse the remaining directories to find the best candidate scan
            subject_scans = subject_dir_dict[dir_entry.name]

            scans_info = np.array(subject_scans)
            sorted_idx = np.argsort(scans_info[:, 3])
            
            # check for a 5mm slice scan
            if float(scans_info[sorted_idx[-1], 3]) == 5.0:
                scans_info_5 = scans_info[scans_info[:, 3].astype(float) == 5.0]
                print(scans_info_5)
                
                # check if there are two 5mm slice scans
                if float(scans_info[sorted_idx[-2], 3]) == 5.0:
                    # if more than one 5mm slice scan, use the study with fewest slices
                    sorted_slices_idx = np.argsort(scans_info[:, 4])
                    subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_slices_idx[-1]]  # replace with fewest slices study
                    if subject_dir_dict[dir_entry.name][4] < 32:
                        print("Warning: Fewer than 32 slices for {}".format(dir_entry.path + "/" + subject_dir_dict[dir_entry.name][0]))
                else:
                    subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_idx[-1]]  # replace entries with the 5mm study

            else:   # no 5mm slice scans, so choose the smallest
                subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_idx[0]]  # replace with the small slice scan

    # convert the chosen study folder to nifti for further processing
    # print(dir_entry.path + "/" + dir_entry.name)


[['CT Plain' '0.451172' '0.451172' '5.0' '30']]
[['CT Plain' '0.431641' '0.431641' '5.0' '28']]
[['CT Plain' '0.453125' '0.453125' '5.0' '30']]
[['CT 55mm Plain' '0.429688' '0.429688' '5.0' '32']]
[['CT Plain' '0.431641' '0.431641' '5.0' '28']]
[['CT PRE CONTRAST 5MM STD' '0.392578' '0.392578' '5.0' '28']]
[['CT 55mm Plain' '0.429688' '0.429688' '5.0' '32']]
[['CT 55mm Plain' '0.474609' '0.474609' '5.0' '32']
 ['CT 55mm Plain-2' '0.474609' '0.474609' '5.0' '32']]
[['CT 5mm' '0.496094' '0.496094' '5.0' '32']]
[['CT 5mm' '0.488281' '0.488281' '5.0' '32']]
[['CT 55mm Plain' '0.488281' '0.488281' '5.0' '32']]
[['CT 55mm Plain' '0.503906' '0.503906' '5.0' '36']]
[['CT Plain' '0.417969' '0.417969' '5.0' '34']]
[['CT Plain' '0.46875' '0.46875' '5.0' '34']]
[['CT Plain' '0.488281' '0.488281' '5.0' '28']]
[['CT PRE CONTRAST 5MM STD' '0.488281' '0.488281' '5.0' '30']]
[['CT PRE CONTRAST 5MM STD' '0.488281' '0.488281' '5.0' '33']]
[['CT 55mm Plain' '0.490234' '0.490234' '5.0' '36']]
[['CT Plain' 

Error reading DICOM file D:/CQ500/CQ500 dataset/CQ500-CT-305/Unknown Study/CT 55mm Plain\CT000000.dcm: 'FileDataset' object has no attribute 'PixelSpacing'
Error reading DICOM file D:/CQ500/CQ500 dataset/CQ500-CT-306/Unknown Study/CT Plain 3mm\CT000000.dcm: 'FileDataset' object has no attribute 'PixelSpacing'
Error reading DICOM file D:/CQ500/CQ500 dataset/CQ500-CT-306/Unknown Study/CT Plain THIN\CT000000.dcm: 'FileDataset' object has no attribute 'PixelSpacing'
Error reading DICOM file D:/CQ500/CQ500 dataset/CQ500-CT-307/Unknown Study/CT Plain\CT000000.dcm: 'FileDataset' object has no attribute 'PixelSpacing'
Error reading DICOM file D:/CQ500/CQ500 dataset/CQ500-CT-307/Unknown Study/CT PLAIN THIN\CT000000.dcm: 'FileDataset' object has no attribute 'PixelSpacing'
Error reading DICOM file D:/CQ500/CQ500 dataset/CQ500-CT-308/Unknown Study/CT Plain\CT000000.dcm: 'FileDataset' object has no attribute 'PixelSpacing'
Error reading DICOM file D:/CQ500/CQ500 dataset/CQ500-CT-308/Unknown Study/

[['CT 5mm' '0.501953' '0.501953' '5.0' '36']
 ['CT 5mm-2' '0.501953' '0.501953' '5.0' '36']]
[['CT 2.55mm' '0.488281' '0.488281' '5.0' '32']
 ['CT 2.55mm-2' '0.488281' '0.488281' '5.0' '32']]
[['CT 55mm Plain' '0.449219' '0.449219' '5.0' '32']]
[['CT Plain' '0.488281' '0.488281' '5.0' '30']]
[['CT 2.55mm' '0.488281' '0.488281' '5.0' '32']
 ['CT ORAL IV' '0.820312' '0.820312' '5.0' '173']
 ['CT ORAL IV-2' '0.751953' '0.751953' '5.0' '108']
 ['CT ORAL IV-3' '0.976562' '0.976562' '5.0' '143']]
[['CT 55mm Plain' '0.421875' '0.421875' '5.0' '36']
 ['CT 55mm Plain-2' '0.390625' '0.390625' '5.0' '32']]
[['CT Plain' '0.488281' '0.488281' '5.0' '32']
 ['CT Plain-2' '0.488281' '0.488281' '5.0' '12']]
[['CT Plain' '0.488281' '0.488281' '5.0' '30']]
[['CT PRE CONTRAST 5MM STD' '0.451172' '0.451172' '5.0' '33']]
[['CT Plain' '0.488281' '0.488281' '5.0' '32']]
[['CT Plain' '0.462891' '0.462891' '5.0' '32']]
[['CT PRE CONTRAST 5MM STD' '0.431641' '0.431641' '5.0' '29']]
[['CT 55mm Plain' '0.429688' '