#### This will be used to search extracted dicom dataset to write 3 files:  <br>
1. csv with info for the best serie on each scan (name, pxspacing, thickness, n_slices)
2. txt containing path for usable scans (serie with 5mm thickness)
3. txt containing path for unusable scans (series != 5mm)

# Import Required Libraries

In [1]:
import os
import re
import csv
import numpy as np
import pydicom
import shutil
from glob import glob
from pydicom.filereader import read_dicomdir

# Create Serie Info Dictionary for each scan

In [2]:
# ['root_dir/CQ500CTX/', 'root_dir/CQ500CTX/']
def get_exam_dirs(root_dir):
    subdirs = []
    for dir_entry in os.scandir(root_dir):
        if dir_entry.is_dir():
            exam_dir = root_dir + dir_entry.name + "/"
            # move series to exam_dir
            move_series(exam_dir)
            subdirs.append(exam_dir)
    return subdirs

def move_series(exam_dir):
    subdir = glob(exam_dir + "dicoms/CQ500*")
    if subdir != []:
        unknown_dir = glob(subdir[0] + "/Unknown*")
        if unknown_dir != []:
            # list and then move series
            series_list = glob(unknown_dir[0] + "/*")
            for serie_dir in series_list:
                shutil.move(new_serie_dir, exam_dir + "dicoms/")
            # remove subdir
            os.rmdir(unknown_dir[0])
            os.rmdir(subdir[0])

# this format will be: %{exam1: [mod1: [info], mod2: [info]], exam2: [mod3: [info], mod4: [info]]}
def gen_exams_dictionary(exam_dirs):
    exams_dict = dict()
    for exam_dir in exam_dirs:
        mods_dict = list()
        dcm_dir = exam_dir + "/dicoms/"
        for serie_dir in os.listdir(dcm_dir):
            # rename dir replacing spaces with underline
            new_serie_dir = serie_dir.replace(" ", "_")
            os.rename(dcm_dir + serie_dir, dcm_dir + new_serie_dir)
            if not re.search("(POST|(?<!PRE_)CONTRAST|BONE)", new_serie_dir):
                # get instances info
                dcm_files = glob(dcm_dir + new_serie_dir + "/*.dcm")                
                dcm_data = pydicom.read_file(dcm_files[0])
                info = [new_serie_dir, float(dcm_data.PixelSpacing[0]), float(dcm_data.PixelSpacing[1]), float(dcm_data.SliceThickness), len(dcm_files)]
                mods_dict.append(info)
        if mods_dict != []:
            exams_dict[exam_dir] = mods_dict
        else:
            print("Warning: " + exam_dir + " has no usable serie")
    return exams_dict

In [3]:
root_dir = '/home/rodrigo/Batch/'
# root_dir = '/home/rodrigo/processed-dataset/'
# exam_dirs = get_exam_dirs(root_dir)
# exams_dictionary = gen_exams_dictionary(exam_dirs)
# print(exams_dictionary)

# Sort Dictionary to choose best scan
1. sort by Thickness (ascending)
2. if a 5mm exists, remove series with different thickness
2.1 sort by 5mm with fewest slices
3. if no 5mm, choose with fewest slices

In [4]:
# serie structure: ['serie', PxSpacing[0], PxSpacing[1], Thickness, n_slices]
# dictionary structure:
#   {'/home/rodrigo/Batch/CQ500CT0/': 
#    [['CT 4cc .', 0.443359,     0.443359,     0.625,     239], 
#    ['CT Plain', 0.451172,     0.451172,     5.0,       30],
#    ['CT 4cc .', 0.443359,     0.443359,     5.0,     239]]
# sort by 1) Thickness = 5, 2) Fewest Number of Slices

def check_best_scan(exams_dictionary):
    for exam_id in exams_dictionary:
        scans_info = np.array(exams_dictionary[exam_id])
        ind = scans_info[:,3].argsort()[::-1]
        scans_info = scans_info[ind]
        length = len(scans_info)
        # find if there is more than one with 5mm, then sort by fewest slices
        if scans_info[0][3] == '5.0':
            for i in range(length):
                if scans_info[-1][3] != '5.0':
                    scans_info = scans_info[:-1, :]
            scans_info = scans_info[scans_info[:,4].argsort()]
            exams_dictionary[exam_id] = list(scans_info[0])
        else: #get serie with fewest slices
            scans_info = scans_info[scans_info[:,4].argsort()]
            exams_dictionary[exam_id] = list(scans_info[0])
    return exams_dictionary

def save_best_scans(root_dir, best_scans_dict):
    sorted_dict = dict(sorted(best_scans_dict.items()))
    w = csv.writer(open(root_dir + 'ScansInfo.csv', 'w'))
    y = open(root_dir + 'GoodDirs.txt','w')
    z = open(root_dir + 'ToReslice.txt', 'w')
    for key, val in sorted_dict.items():
        w.writerow([key, val])
        if val[3] == '5.0':
            y.write(key + "dicoms/" + val[0] + "\n")
        else:
            z.write(key + "dicoms/" + val[0] + "\n")

In [5]:
exam_dirs = get_exam_dirs(root_dir)
exams_dictionary = gen_exams_dictionary(exam_dirs)
best_scans_dict = check_best_scan(exams_dictionary)
save_best_scans(root_dir, best_scans_dict)