In [28]:
import os
import re
import numpy as np
import pydicom
from glob import glob
from pydicom.filereader import read_dicomdir

In [40]:
root_dir = '/home/rodrigo/Batch/'
# root_dir = '/home/rodrigo/processed-dataset/'

# ['root_dir/CQ500CTX/', 'root_dir/CQ500CTX/']
def get_exam_dirs(root_dir):
    subdirs = []
    for dir_entry in os.scandir(root_dir):
        if dir_entry.is_dir() and dir_entry.name.startswith("CQ500"):
            # NONFUNCTION RenameDirs
            subdirs.append(root_dir + dir_entry.name + "/")
    return subdirs

def gen_exams_dictionary(exam_dirs):
    # this format will be: %{exam1: %{mod1: [info], mod2: [info]}, exam2: %{mod3: [info], mod4: [info]}}
    exams_dict = dict()
    for exam_dir in exam_dirs:
        mods_dict = list()
        for modality in os.listdir(exam_dir):
            if not re.search("(POST|(?<!PRE )CONTRAST|BONE)", modality):
                dcm_files = glob(exam_dir + modality + "/*.dcm")
                dcm_data = pydicom.read_file(dcm_files[0])
                info = [modality, float(dcm_data.PixelSpacing[0]), float(dcm_data.PixelSpacing[1]), float(dcm_data.SliceThickness), len(dcm_files)]
                mods_dict.append(info)
        exams_dict[exam_dir] = mods_dict
    return exams_dict

In [127]:
# {'/home/rodrigo/Batch/CQ500CT0/': 
# ['modality', PxSpacing[0], PxSpacing[1], Thickness, n_slices]   
#[['CT 4cc .', 0.443359,     0.443359,     0.625,     239], 
# ['CT Plain', 0.451172,     0.451172,     5.0,       30],
# ['CT 4cc .', 0.443359,     0.443359,     5.0,     239]]
# sort by 1) Thickness = 5, 2) Fewest Number of Slices

def check_best_scan(exams_dictionary):
    for exam_id in exams_dictionary:
        scans_info = np.array(exams_dictionary[exam_id])
        ind = scans_info[:,3].argsort()[::-1]
        scans_info = scans_info[ind]
        length = len(scans_info)
        if scans_info[0][3] == '5.0':
            for i in range(length):
                if scans_info[-1][3] != '5.0':
                    scans_info = scans_info[:-1, :]
            scans_info = scans_info[scans_info[:,1].argsort()]
            exams_dictionary[exam_id] = list(scans_info[0])
        else:
            scans_info = scans_info[scans_info[:,1].argsort()]
            exams_dictionary[exam_id] = list(scans_info[0])
    return exams_dictionary


In [128]:
exam_dirs = get_exam_dirs(root_dir)
len(exam_dirs)
# NONFUNCTION MoveFromUnknown
exams_dictionary = gen_exams_dictionary(exam_dirs)
best_scan = check_best_scan(exams_dictionary)
print(best_scan)

{'/home/rodrigo/Batch/CQ500CT0/': ['CT Plain', '0.451172', '0.451172', '5.0', '30'], '/home/rodrigo/Batch/CQ500CT99/': ['CT PRE CONTRAST 5MM STD', '0.513672', '0.513672', '5.0', '31']}


{'/home/rodrigo/Batch/CQ500CT0/': [['CT 4cc sec 150cc D3D on', 0.443359, 0.443359, 0.625, 239], ['CT Plain', 0.451172, 0.451172, 5.0, 30], ['CT PLAIN THIN', 0.451172, 0.451172, 0.625, 240], ['CT 4cc sec 150cc D3D on-3', 0.443359, 0.443359, 0.625, 239], ['CT 4cc sec 150cc D3D on-2', 0.443359, 0.443359, 0.625, 239]], '/home/rodrigo/Batch/CQ500CT99/': [['CT PRE CONTRAST 5MM STD', 0.513672, 0.513672, 5.0, 31], ['CT PRE CONTRAST THIN', 0.513672, 0.513672, 0.625, 246]]}
/home/rodrigo/Batch/CQ500CT0/
/home/rodrigo/Batch/CQ500CT99/


In [12]:
data_dir = '/Users/zhengma/Documents/ConvOuch_Zheng/CQ500'

for dir_entry in os.scandir(path = data_dir):
    subject_dir_dict = dict()
    if dir_entry.is_dir():
        subject_dir_dict[dir_entry.name] = list()
        print(dir_entry.name)
        
        # loop through sub-directories and exclude some by name
        for sub_dir in os.scandir(dir_entry.path + "/Unknown Study/"):
            if sub_dir.is_dir() and not re.search("(POST|(?<!PRE )CONTRAST|BONE)", sub_dir.name):
                dcm_files = glob.glob(sub_dir.path + "/*.dcm")
                dicom_data = pydicom.read_file(dcm_files[0])

                # add info for these sub-directories to a dictionary
                sub_info = [sub_dir.name, float(dicom_data.PixelSpacing[0]), float(dicom_data.PixelSpacing[1]), float(dicom_data.SliceThickness), len(dcm_files)]
                subject_dir_dict[dir_entry.name].append(sub_info)
                # print(" -{}".format(subject_dir_dict[dir_entry.name][-1]))

        # warn if no usable directories found
        if(len(subject_dir_dict[dir_entry.name]) < 1):
            print("Warning: {subject} found no usable subdirectories".format(subject=dir_entry.name))
            subject_dir_dict.pop(dir_entry.name)    # remove the entry from the dict

        elif(len(subject_dir_dict[dir_entry.name]) > 1):
            
            # parse the remaining directories to find the best candidate scan
            subject_scans = subject_dir_dict[dir_entry.name]

            scans_info = np.array(subject_scans)
            sorted_idx = np.argsort(scans_info[:, 3])
            
            # check for a 5mm slice scan
            int(scans_info[sorted_idx[-1], 3].astype(float))
            if((scans_info[sorted_idx[-1], 3].astype(float)) == 5.0):
                scans_info_5 = scans_info[(scans_info[:, 3].astype(float)) == 5.0]
                print(scans_info_5)
                
                # check if there are two 5mm slice scans
                if(float(scans_info[sorted_idx[-2], 3]) == 5.0):
                    # if more than one 5mm slice scan, use the study with fewest slices
                    sorted_slices_idx = np.argsort(scans_info[:, 4])
                    # replace with fewest slices study
                    subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_slices_idx[-1]]
                    if(subject_dir_dict[dir_entry.name][4] < 32):
                        print("Warning: Fewer than 32 slices for {}".format(dir_entry.path + "/" + subject_dir_dict[dir_entry.name][0]))
                else:
                    # replace entries with the 5mm study
                    subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_idx[-1]]

            else:   # no 5mm slice scans, so choose the smallest
                # replace with the small slice scan
                subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_idx[0]]


convert the chosen study folder to nifti for further processing
print(dir_entry.path + "/" + dir_entry.name)



[['CT 5mm' '0.5625' '0.5625' '5.0' '36']]
[['CT Plain' '0.478516' '0.478516' '5.0' '32']]
[['CT Plain' '0.451172' '0.451172' '5.0' '30']]
[['CT Plain' '0.460938' '0.460938' '5.0' '30']]
[['CT 5mm' '0.488281' '0.488281' '5.0' '32']]
[['CT Plain' '0.488281' '0.488281' '5.0' '30']]
[['CT 55mm Plain' '0.466797' '0.466797' '5.0' '36']]


# Non-Function Code

In [None]:
# RenameDirs: used this to rename files from 'CQ500CTX CQ500CTX' to 'CQ500CTX'
#   dir_name = dir_entry.name.split(" ")[0]
#   os.rename(root_dir + dir_entry.name, root_dir + dir_name)

# MoveFromUnkown: "exam/Unkown Study/*" -> "exam/*" 
# for dir in dir_list:
#     for subdir in os.listdir(dir):
#         if subdir == "Unknown Study":
#             for subsubdir in os.listdir(dir + subdir):
#                 os.rename(dir + subdir + "/" + subsubdir, dir + subsubdir)
#             os.rmdir(dir + subdir)