#### This will be used to extract, structure folders and generate pngs for dicoms in dataset CQ500

# Import Required Libraries

In [1]:
import os
import re
import gdcm
import zipfile
import operator
from glob import glob
from shutil import rmtree
from pathlib import Path
import subprocess

# Extract and Move Dicom files
### TODO: Use 'GoodDirs.txt' to move only chosen serie for each exam

In [2]:
def create_folder_structure(dataset_dir, exam_number):
    exam_dir = dataset_dir + exam_number
    dcm_dir = exam_dir + "/dicoms/"; png_dir = exam_dir + "/pngs/"
    proc_dir = exam_dir + "/processed/"; hist_dir = exam_dir + "/histograms/"
    os.makedirs(dcm_dir, exist_ok=True); os.makedirs(png_dir, exist_ok=True);
    os.makedirs(proc_dir, exist_ok=True); os.makedirs(hist_dir, exist_ok=True)
    return dcm_dir

def extract_dicom_zips(zip_list):
    count = 0
    for zip_path in zip_list:
        exam_number = re.findall(r'\d+', zip_path)[-1]
        dcm_dir = create_folder_structure(dataset_dir, exam_number)
        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(dcm_dir)
            count += 1
            print(str(count) + "/" + str(len(zip_list)))
        
# dataset_dir = "/home/rodrigo/Batch/"
# zip_list = glob(dataset_dir + "*.zip")
# extract_dicom_zips(zip_list)

# Order and Rename Serie by Z coordinate

In [5]:
def order_all_series(scans_file):
    f = open(scans_file, "r")
    for serie_path in f.readlines():
        serie_path = serie_path.strip("\n")
        sorted_serie = order_serie(serie_path)
        rename_serie_instances(serie_path, sorted_serie)
        
def order_serie(serie_dir):
    dcm_z = {}
    print(serie_dir)
    for dcm in glob(serie_dir + "/*.dcm"):
        result = os.popen("gdcminfo " + dcm).read().splitlines()[-5]
        Z = result.split(",")[-1].strip(")")
        dcm_z[os.path.basename(dcm)] = float(Z)
    sorted_by_z = sorted(dcm_z.items(), key=operator.itemgetter(1))
    return sorted_by_z

def rename_serie_instances(serie_path, sorted_by_z):    
    count = 0
    for dcm in sorted_by_z:
        os.rename(serie_path + "/" + dcm[0], serie_path + "/CT" + str(count) + ".dcm")
        count = count + 1
             
# root_dir = '/home/rodrigo/Batch/'
# good_scans = root_dir + 'GoodDirs.txt'
# order_all_series(good_scans)

/home/rodrigo/Batch/0/dicoms/CT_Plain
/home/rodrigo/Batch/102/dicoms/CT_Plain
/home/rodrigo/Batch/109/dicoms/CT_5mm
/home/rodrigo/Batch/159/dicoms/CT_5mm-2
/home/rodrigo/Batch/243/dicoms/CT_Plain
/home/rodrigo/Batch/303/dicoms/CT_PRE_CONTRAST_5MM_STD
/home/rodrigo/Batch/320/dicoms/CT_55mm_Plain
/home/rodrigo/Batch/486/dicoms/CT_Plain
/home/rodrigo/Batch/490/dicoms/CT_Plain
/home/rodrigo/Batch/89/dicoms/CT_PRE_CONTRAST_5MM_STD


# Convert Dicom to PNG (decompress if necessary)
#### its important to order the slices first

In [23]:
# convert compressed dicom to raw format
def convert_to_raw(src, dst):
    subprocess.run(["gdcmconv", "--raw", src, dst])
# convert dicom to png
def dcm_to_png(src, dst):
    subprocess.run(["dcm2pnm", "+on", "+Sxv", "224", "+Wi", "1", src, dst])
    
def generate_pngs(serie_path):
    # /home/rodrigo/Batch/0/dicoms/serie_name
    exam_dir = os.path.dirname(os.path.dirname(serie_path))
    for dcm_path in glob(serie_path + "/*.dcm"):
        compression = os.popen("gdcminfo " + dcm_path).read().splitlines()[1]
        png_name = Path(dcm_path).stem + ".png"
        if "JPEG" in compression:
            raw_path = exam_dir + "/pngs/" + os.path.basename(dcm_path)
            convert_to_raw(dcm_path, raw_path)
            dcm_to_png(raw_path, exam_dir + "/pngs/" + png_name)
            os.remove(raw_path)
        else:
            dcm_to_png(dcm_path, exam_dir + "/pngs/" + png_name)

def gen_all_pngs(root_dir):
    f = open(root_dir + "GoodDirs.txt", "r")
    scans = f.readlines()
    n_scans = len(scans)
    x = 0
    for serie_path in scans:
        serie_path = serie_path.strip("\n")
        if os.path.isdir(serie_path):
            print(serie_path)
            generate_pngs(serie_path)
            print(str(x) + "/" + str(n_scans))
            x += 1
        
root_dir = '/home/rodrigo/Batch/'
gen_all_pngs(root_dir)

/home/rodrigo/Batch/0/dicoms/CT_Plain
0/10
/home/rodrigo/Batch/102/dicoms/CT_Plain
1/10
/home/rodrigo/Batch/109/dicoms/CT_5mm
2/10
/home/rodrigo/Batch/159/dicoms/CT_5mm-2
3/10
/home/rodrigo/Batch/243/dicoms/CT_Plain
4/10
/home/rodrigo/Batch/303/dicoms/CT_PRE_CONTRAST_5MM_STD
5/10
/home/rodrigo/Batch/320/dicoms/CT_55mm_Plain
6/10
/home/rodrigo/Batch/486/dicoms/CT_Plain
7/10
/home/rodrigo/Batch/490/dicoms/CT_Plain
8/10
/home/rodrigo/Batch/89/dicoms/CT_PRE_CONTRAST_5MM_STD
9/10


#### not used anymore

In [None]:
# def move_dicoms_recursively(exam_dir):
#     dicoms = glob(exam_dir + "/dicoms/**/*.dcm", recursive=True)
#     count2 = 0
#     dir = exam_dir + "/dicoms/"
#     try:
#         subdir = [dI for dI in os.listdir(dir) if os.path.isdir(os.path.join(dir,dI))][0]
#         for filepath in dicoms:
#             dicom_name = os.path.basename(exam_dir) + "-" + str(count2) + ".dcm" 
#             os.rename(filepath, dir + dicom_name)
#             count2 = count2 + 1
#         print(str(count2) + "/" + str(len(dicoms)))
#         rmtree(dir + subdir)
#     except IndexError:
#         print("No dicoms to move.")

# def move_all_dicoms(dir_list):
#     x = 0
#     for exam_dir in dir_list:
#         move_dicoms_recursively(exam_dir)
#         print(str(x) + "/" + str(len(dir_list)))
#         x = x + 1
# dir_list = 
# move_all_dicoms(dir_list)