In [1]:
# PART 1 (ProcessDicom)
# dataset_dir = "/home/rodrigo/processed-dataset/"
# zip_list = glob(dataset_dir + "*.zip")
# extract_dicom_zips(zip_list)

# PART 2 (ChooseBestScans)
# root_dir = '/home/rodrigo/processed-dataset/'
# exam_dirs = get_exam_dirs(root_dir)
# exams_dictionary = gen_exams_dictionary(exam_dirs)
# best_scans_dict = check_best_scan(exams_dictionary)
# save_best_scans(root_dir, best_scans_dict)

# PART 3 (ProcessDicom)
# root_dir = '/home/rodrigo/processed-dataset/'
# good_scans = root_dir + 'GoodDirs.txt'
# order_all_series(good_scans)
# gen_all_pngs(root_dir)

# PART 4 (ProcessImages)
# dataset_dir = "/home/rodrigo/processed-dataset/"
# dir_list = glob(dataset_dir + "[0-9]*")
# total = len(dir_list)
#
# complete = 1
# for exam_dir in dir_list:
#     print("processing exam " + os.path.basename(exam_dir))
#     image_dir = exam_dir + "/pngs"
#     filelist = os.listdir(image_dir)
#     process_exam(filelist, image_dir)
#     print(str(complete) + "/" + str(total))
#     complete += 1

# PART 5 (PrepareTrainingData)
# root_dir = "/home/rodrigo/processed-dataset/"
# chosen_scans = root_dir + "GoodDirs.txt"
# labels_file = root_dir + "labels-batch.csv"
#
# serie_list = read_serie_paths(chosen_scans)
# labels = read_labels(labels_file)
# save_slices_labels(serie_list, labels)


# PART 6 (Model)

#### This will be used to extract, structure folders and generate pngs for dicoms in dataset CQ500

# Import Required Libraries

In [2]:
import os
import re
import gdcm
import zipfile
import operator
from glob import glob
from shutil import rmtree
from pathlib import Path
import subprocess

# Extract and Move Dicom files

In [3]:
def create_folder_structure(dataset_dir, exam_number):
    exam_dir = dataset_dir + exam_number
    dcm_dir = exam_dir + "/dicoms/"; png_dir = exam_dir + "/pngs/"
    proc_dir = exam_dir + "/processed/"; hist_dir = exam_dir + "/histograms/"
    os.makedirs(dcm_dir, exist_ok=True); os.makedirs(png_dir, exist_ok=True);
    os.makedirs(proc_dir, exist_ok=True); os.makedirs(hist_dir, exist_ok=True)
    return dcm_dir

def extract_dicom_zips(zip_list):
    count = 0
    for zip_path in zip_list:
        exam_number = re.findall(r'\d+', zip_path)[-1]
        dcm_dir = create_folder_structure(dataset_dir, exam_number)
        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(dcm_dir)
            count += 1
            print(str(count) + "/" + str(len(zip_list)))
        
# dataset_dir = "/home/rodrigo/processed-dataset/"
# zip_list = glob(dataset_dir + "*.zip")
# extract_dicom_zips(zip_list)

# Order and Rename Serie by Z coordinate

In [4]:
def order_all_series(scans_file):
    f = open(scans_file, "r")
    for serie_path in f.readlines():
        serie_path = serie_path.strip("\n")
        sorted_serie = order_serie(serie_path)
        rename_serie_instances(serie_path, sorted_serie)
        
def order_serie(serie_dir):
    dcm_z = {}
    print(serie_dir)
    for dcm in glob(serie_dir + "/*.dcm"):
        result = os.popen("gdcminfo " + dcm).read().splitlines()[-5]
        Z = result.split(",")[-1].strip(")")
        dcm_z[os.path.basename(dcm)] = float(Z)
    sorted_by_z = sorted(dcm_z.items(), key=operator.itemgetter(1))
    return sorted_by_z

def rename_serie_instances(serie_path, sorted_by_z):    
    count = 0
    for dcm in sorted_by_z:
        os.rename(serie_path + "/" + dcm[0], serie_path + "/CT" + str(count) + ".dcm")
        count = count + 1
             
# root_dir = '/home/rodrigo/Batch/'
# good_scans = root_dir + 'GoodDirs.txt'
# order_all_series(good_scans)

# Convert Dicom to PNG (decompress if necessary)
#### its important to order the slices first

In [5]:
# convert compressed dicom to raw format
def convert_to_raw(src, dst):
    subprocess.run(["gdcmconv", "--raw", src, dst])
# convert dicom to png
def dcm_to_png(src, dst):
    subprocess.run(["dcm2pnm", "+on", "+Sxv", "224", "+Wi", "1", src, dst])
    
def generate_pngs(serie_path):
    # /home/rodrigo/Batch/0/dicoms/serie_name
    exam_dir = os.path.dirname(os.path.dirname(serie_path))
    for dcm_path in glob(serie_path + "/*.dcm"):
        compression = os.popen("gdcminfo " + dcm_path).read().splitlines()[1]
        png_name = Path(dcm_path).stem + ".png"
        if "JPEG" in compression:
            raw_path = exam_dir + "/pngs/" + os.path.basename(dcm_path)
            convert_to_raw(dcm_path, raw_path)
            dcm_to_png(raw_path, exam_dir + "/pngs/" + png_name)
            os.remove(raw_path)
        else:
            dcm_to_png(dcm_path, exam_dir + "/pngs/" + png_name)

def gen_all_pngs(root_dir):
    f = open(root_dir + "GoodDirs.txt", "r")
    scans = f.readlines()
    n_scans = len(scans)
    x = 0
    for serie_path in scans:
        serie_path = serie_path.strip("\n")
        if os.path.isdir(serie_path):
            print(serie_path)
            generate_pngs(serie_path)
            print(str(x) + "/" + str(n_scans))
            x += 1
        
root_dir = '/home/rodrigo/processed-dataset/'
good_scans = root_dir + 'GoodDirs.txt'
order_all_series(good_scans)
gen_all_pngs(root_dir)

/home/rodrigo/processed-dataset/0/dicoms/CT_Plain
/home/rodrigo/processed-dataset/1/dicoms/CT_2.55mm
/home/rodrigo/processed-dataset/101/dicoms/CT_Plain
/home/rodrigo/processed-dataset/102/dicoms/CT_Plain
/home/rodrigo/processed-dataset/103/dicoms/CT_55mm_Plain
/home/rodrigo/processed-dataset/104/dicoms/CT_Plain
/home/rodrigo/processed-dataset/105/dicoms/CT_PRE_CONTRAST_5MM_STD
/home/rodrigo/processed-dataset/106/dicoms/CT_55mm_Plain
/home/rodrigo/processed-dataset/107/dicoms/CT_55mm_Plain-2
/home/rodrigo/processed-dataset/108/dicoms/CT_5mm
/home/rodrigo/processed-dataset/109/dicoms/CT_5mm
/home/rodrigo/processed-dataset/11/dicoms/CT_55mm_Plain
/home/rodrigo/processed-dataset/110/dicoms/CT_55mm_Plain
/home/rodrigo/processed-dataset/111/dicoms/CT_Plain
/home/rodrigo/processed-dataset/113/dicoms/CT_Plain
/home/rodrigo/processed-dataset/116/dicoms/CT_Plain
/home/rodrigo/processed-dataset/117/dicoms/CT_PRE_CONTRAST_5MM_STD
/home/rodrigo/processed-dataset/119/dicoms/CT_PRE_CONTRAST_5MM_STD


/home/rodrigo/processed-dataset/26/dicoms/CT_C
/home/rodrigo/processed-dataset/260/dicoms/CT_PRE_CONTRAST_5MM_STD
/home/rodrigo/processed-dataset/261/dicoms/CT_PRE_CONTRAST_5MM_STD
/home/rodrigo/processed-dataset/262/dicoms/CT_Plain
/home/rodrigo/processed-dataset/263/dicoms/CT_Plain
/home/rodrigo/processed-dataset/265/dicoms/CT_PRE_CONTRAST_5MM_STD
/home/rodrigo/processed-dataset/266/dicoms/CT_Plain
/home/rodrigo/processed-dataset/267/dicoms/CT_C
/home/rodrigo/processed-dataset/268/dicoms/CT_55mm_Plain
/home/rodrigo/processed-dataset/269/dicoms/CT_55mm_Plain
/home/rodrigo/processed-dataset/27/dicoms/CT_Plain
/home/rodrigo/processed-dataset/270/dicoms/CT_Plain
/home/rodrigo/processed-dataset/271/dicoms/CT_5mm
/home/rodrigo/processed-dataset/273/dicoms/CT_5mm
/home/rodrigo/processed-dataset/274/dicoms/CT_55mm_Plain
/home/rodrigo/processed-dataset/275/dicoms/CT_PRE_CONTRAST_5MM_STD
/home/rodrigo/processed-dataset/277/dicoms/CT_5mm-3
/home/rodrigo/processed-dataset/278/dicoms/CT_PRE_CONTR

/home/rodrigo/processed-dataset/419/dicoms/CT_5mm
/home/rodrigo/processed-dataset/42/dicoms/CT_PRE_CONTRAST_5MM_STD
/home/rodrigo/processed-dataset/420/dicoms/CT_5mm
/home/rodrigo/processed-dataset/421/dicoms/CT_55mm_Plain
/home/rodrigo/processed-dataset/422/dicoms/CT_Plain
/home/rodrigo/processed-dataset/423/dicoms/CT_5mm-2
/home/rodrigo/processed-dataset/425/dicoms/CT_5mm-2
/home/rodrigo/processed-dataset/427/dicoms/CT_2.55mm
/home/rodrigo/processed-dataset/428/dicoms/CT_55mm_Plain
/home/rodrigo/processed-dataset/429/dicoms/CT_Plain
/home/rodrigo/processed-dataset/43/dicoms/CT_ORAL_IV-2
/home/rodrigo/processed-dataset/430/dicoms/CT_55mm_Plain-2
/home/rodrigo/processed-dataset/431/dicoms/CT_Plain-2
/home/rodrigo/processed-dataset/433/dicoms/CT_Plain
/home/rodrigo/processed-dataset/434/dicoms/CT_PRE_CONTRAST_5MM_STD
/home/rodrigo/processed-dataset/435/dicoms/CT_Plain
/home/rodrigo/processed-dataset/436/dicoms/CT_Plain
/home/rodrigo/processed-dataset/437/dicoms/CT_Plain
/home/rodrigo/pr

27/410
/home/rodrigo/processed-dataset/13/dicoms/CT_PRE_CONTRAST_5MM_STD
28/410
/home/rodrigo/processed-dataset/130/dicoms/CT_Plain
29/410
/home/rodrigo/processed-dataset/132/dicoms/CT_PRE_CONTRAST_5MM_STD
30/410
/home/rodrigo/processed-dataset/135/dicoms/CT_Plain
31/410
/home/rodrigo/processed-dataset/136/dicoms/CT_ORAL_IV
32/410
/home/rodrigo/processed-dataset/137/dicoms/CT_PRE_CONTRAST_5MM_STD
33/410
/home/rodrigo/processed-dataset/138/dicoms/CT_PRE_CONTRAST_5MM_STD
34/410
/home/rodrigo/processed-dataset/139/dicoms/CT_55mm_Plain
35/410
/home/rodrigo/processed-dataset/140/dicoms/CT_Plain
36/410
/home/rodrigo/processed-dataset/141/dicoms/CT_PRE_CONTRAST_5MM_STD
37/410
/home/rodrigo/processed-dataset/142/dicoms/CT_55mm_Plain
38/410
/home/rodrigo/processed-dataset/143/dicoms/CT_Plain
39/410
/home/rodrigo/processed-dataset/144/dicoms/CT_PRE_CONTRAST_5MM_STD
40/410
/home/rodrigo/processed-dataset/145/dicoms/CT_Plain
41/410
/home/rodrigo/processed-dataset/146/dicoms/CT_PRE_CONTRAST_5MM_STD

157/410
/home/rodrigo/processed-dataset/270/dicoms/CT_Plain
158/410
/home/rodrigo/processed-dataset/271/dicoms/CT_5mm
159/410
/home/rodrigo/processed-dataset/273/dicoms/CT_5mm
160/410
/home/rodrigo/processed-dataset/274/dicoms/CT_55mm_Plain
161/410
/home/rodrigo/processed-dataset/275/dicoms/CT_PRE_CONTRAST_5MM_STD
162/410
/home/rodrigo/processed-dataset/277/dicoms/CT_5mm-3
163/410
/home/rodrigo/processed-dataset/278/dicoms/CT_PRE_CONTRAST_5MM_STD
164/410
/home/rodrigo/processed-dataset/279/dicoms/CT_Plain
165/410
/home/rodrigo/processed-dataset/28/dicoms/CT_55mm_Plain
166/410
/home/rodrigo/processed-dataset/280/dicoms/CT_Plain
167/410
/home/rodrigo/processed-dataset/281/dicoms/CT_55mm_Plain
168/410
/home/rodrigo/processed-dataset/282/dicoms/CT_5mm
169/410
/home/rodrigo/processed-dataset/283/dicoms/CT_Plain-2
170/410
/home/rodrigo/processed-dataset/284/dicoms/CT_PRE_CONTRAST_5MM_STD
171/410
/home/rodrigo/processed-dataset/285/dicoms/CT_5mm
172/410
/home/rodrigo/processed-dataset/286/dic

287/410
/home/rodrigo/processed-dataset/410/dicoms/CT_55mm_Plain
288/410
/home/rodrigo/processed-dataset/411/dicoms/CT_5mm
289/410
/home/rodrigo/processed-dataset/412/dicoms/CT_5mm
290/410
/home/rodrigo/processed-dataset/414/dicoms/CT_PRE_CONTRAST_5MM_STD
291/410
/home/rodrigo/processed-dataset/415/dicoms/CT_2.55mm
292/410
/home/rodrigo/processed-dataset/416/dicoms/CT_Plain
293/410
/home/rodrigo/processed-dataset/417/dicoms/CT_Plain
294/410
/home/rodrigo/processed-dataset/418/dicoms/CT_55mm_Plain
295/410
/home/rodrigo/processed-dataset/419/dicoms/CT_5mm
296/410
/home/rodrigo/processed-dataset/42/dicoms/CT_PRE_CONTRAST_5MM_STD
297/410
/home/rodrigo/processed-dataset/420/dicoms/CT_5mm
298/410
/home/rodrigo/processed-dataset/421/dicoms/CT_55mm_Plain
299/410
/home/rodrigo/processed-dataset/422/dicoms/CT_Plain
300/410
/home/rodrigo/processed-dataset/423/dicoms/CT_5mm-2
301/410
/home/rodrigo/processed-dataset/425/dicoms/CT_5mm-2
302/410
/home/rodrigo/processed-dataset/427/dicoms/CT_2.55mm
30

#### not used anymore

In [6]:
# def move_dicoms_recursively(exam_dir):
#     dicoms = glob(exam_dir + "/dicoms/**/*.dcm", recursive=True)
#     count2 = 0
#     dir = exam_dir + "/dicoms/"
#     try:
#         subdir = [dI for dI in os.listdir(dir) if os.path.isdir(os.path.join(dir,dI))][0]
#         for filepath in dicoms:
#             dicom_name = os.path.basename(exam_dir) + "-" + str(count2) + ".dcm" 
#             os.rename(filepath, dir + dicom_name)
#             count2 = count2 + 1
#         print(str(count2) + "/" + str(len(dicoms)))
#         rmtree(dir + subdir)
#     except IndexError:
#         print("No dicoms to move.")

# def move_all_dicoms(dir_list):
#     x = 0
#     for exam_dir in dir_list:
#         move_dicoms_recursively(exam_dir)
#         print(str(x) + "/" + str(len(dir_list)))
#         x = x + 1
# dir_list = 
# move_all_dicoms(dir_list)