In [8]:
import os
import re
import csv
import numpy as np
import pydicom
from glob import glob
from pydicom.filereader import read_dicomdir

# Best scan for each study, create info dictionary

In [15]:
# root_dir = '/home/rodrigo/Batch/'
root_dir = '/home/rodrigo/processed-dataset/'

# ['root_dir/CQ500CTX/', 'root_dir/CQ500CTX/']
def get_exam_dirs(root_dir):
    subdirs = []
    for dir_entry in os.scandir(root_dir):
        if dir_entry.is_dir() and dir_entry.name.startswith("CQ500"):
            # NONFUNCTION RenameDirs
            subdirs.append(root_dir + dir_entry.name + "/")
    return subdirs

# this format will be: %{exam1: [mod1: [info], mod2: [info]], exam2: [mod3: [info], mod4: [info]]}
def gen_exams_dictionary(exam_dirs):
    exams_dict = dict()
    for exam_dir in exam_dirs:
        mods_dict = list()
        for modality in os.listdir(exam_dir):
            if not re.search("(POST|(?<!PRE )CONTRAST|BONE)", modality):
                dcm_files = glob(exam_dir + modality + "/*.dcm")
                dcm_data = pydicom.read_file(dcm_files[0])
                info = [modality, float(dcm_data.PixelSpacing[0]), float(dcm_data.PixelSpacing[1]), float(dcm_data.SliceThickness), len(dcm_files)]
                mods_dict.append(info)
        if mods_dict != []:
            exams_dict[exam_dir] = mods_dict
        else:
            print("Warning: " + exam_dir + " has no usable modality")
    return exams_dict

In [18]:
# {'/home/rodrigo/Batch/CQ500CT0/': 
# ['modality', PxSpacing[0], PxSpacing[1], Thickness, n_slices]   
#[['CT 4cc .', 0.443359,     0.443359,     0.625,     239], 
# ['CT Plain', 0.451172,     0.451172,     5.0,       30],
# ['CT 4cc .', 0.443359,     0.443359,     5.0,     239]]
# sort by 1) Thickness = 5, 2) Fewest Number of Slices

def check_best_scan(exams_dictionary):
    for exam_id in exams_dictionary:
        scans_info = np.array(exams_dictionary[exam_id])
        ind = scans_info[:,3].argsort()[::-1]
        scans_info = scans_info[ind]
        length = len(scans_info)
        if scans_info[0][3] == '5.0':
            for i in range(length):
                if scans_info[-1][3] != '5.0':
                    scans_info = scans_info[:-1, :]
            scans_info = scans_info[scans_info[:,1].argsort()]
            exams_dictionary[exam_id] = list(scans_info[0])
        else:
            scans_info = scans_info[scans_info[:,1].argsort()]
            exams_dictionary[exam_id] = list(scans_info[0])
    return exams_dictionary

def save_best_scans(root_dir, best_scans_dict):
    sorted_dict = dict(sorted(best_scans_dict.items()))
    w = csv.writer(open(root_dir + "BestScans.csv", "w"))
    for key, val in sorted_dict.items():
        w.writerow([key, val])

In [19]:
exam_dirs = get_exam_dirs(root_dir)
# NONFUNCTION MoveFromUnknown
exams_dictionary = gen_exams_dictionary(exam_dirs)
best_scans_dict = check_best_scan(exams_dictionary)
save_best_scans(root_dir, best_scans_dict)



# Non-Function Code

In [None]:
# RenameDirs: used this to rename files from 'CQ500CTX CQ500CTX' to 'CQ500CTX'
#   dir_name = dir_entry.name.split(" ")[0]
#   os.rename(root_dir + dir_entry.name, root_dir + dir_name)

# MoveFromUnkown: "exam/Unkown Study/*" -> "exam/*" 
# for dir in dir_list:
#     for subdir in os.listdir(dir):
#         if subdir == "Unknown Study":
#             for subsubdir in os.listdir(dir + subdir):
#                 os.rename(dir + subdir + "/" + subsubdir, dir + subsubdir)
#             os.rmdir(dir + subdir)