In [38]:
import matplotlib.pyplot as plt
import pydicom
from pydicom.data import get_testdata_files
import os
import glob
import numpy as np
import shutil
import math

In [39]:
def ExtractDCIMMetadata(filename):
    _dataset = pydicom.dcmread(filename)
    return _dataset

In [40]:
root_folder = "/docs/src/kt/data"
series_folder = "/docs/src/kt/data_by_series"
resized_series_folder = "/docs/src/kt/data_by_series_resized"
IMAGES_PATH = "/docs/src/kt/data_by_series_resized_images"
MIN_NUMBER_OF_PHOTO = 100


In [74]:
def save_fig(file_name, tight_layout=True, fig_extension="jpg", resolution=300):
    # print("Saving figure", file_name)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(file_name, format=fig_extension, dpi=resolution)
    plt.clf()

In [43]:
patients = []

def GetPatientData(folder):
    patient = []
    for file in glob.glob(folder + "*.dcm"):
        print("\tfile: ", file, file=track_file)
        _dataset = ExtractDCIMMetadata(file)
        _dataset["PixelData"].value = ""
        _dataset["ReferringPhysicianName"].value = file
        patient.append(_dataset)

    return patient

track_file = open("tracker.txt", "w")
for folder in glob.glob(root_folder + "/*"):
    print("read from folder: ", folder)
    print("read from folder: ", folder, file=track_file)
    
    patients.append(GetPatientData(folder + "/"))

track_file.close()

read from folder:  /docs/src/kt/data\Buslaev S.N. - Body 1.0


In [44]:
len(patients)

1

In [75]:
evaluation_file = open("evaluation.txt", "w")

for patient in patients:
    print("Patient: ", patient[0]["PatientName"].value, file=evaluation_file)
    for tag_name in patient[0].dir(""):
        if(tag_name != "PixelData"):
            print("Tag name: ", tag_name, file=evaluation_file)
            prev_value = ""
            file_counter = 0
            for single_file in patient:
                file_counter += 1
                try:
                    curr_value = single_file[tag_name].value
                    if(prev_value != curr_value):
                        print(curr_value, end=" ", file=evaluation_file)
                    prev_value = curr_value
                except:
                    print ("tagname: ", tag_name, " in file#", file_counter)
                    
            print(file=evaluation_file)

evaluation_file.close()

--- if following fields present in all MetaData

In [45]:
def CheckFieldsPresence(fields):
    result = 1
    for patient in patients:
        for field in fields:
            file_counter = 0
            for single_file in patient:
                file_counter += 1
                try:
                    type(single_file[field].value)
                except:
                    result = 0
                    print("Patient: ", patient[0]["PatientName"].value, " field ", field, " doesn't exists in file#", file_counter)
#                     break
    return result

def PrintPassages(passages):
    i = 0
    for passage in passages:
        if passage[4] > 10:
            i += 1
            print("\t\t", i, ") ", passage[0], "->", passage[1], ", steps: ", passage[4], ", thickness: ", passage[2])
    print("\t\t# unprinted passages: ", len(passages) - i, "/", len(passages))

def PrintFieldsRanges(fields):
    result = 1
    for patient in patients:
        for field in fields:
            print ("field: ", field)
            min_val = float('inf')
            max_val = -float('inf')
            passages = []
            curr_passage_start = 0
            curr_passage_end = 0
            prev_passage_rate = 0
            prev_val = 0
            passage_counter = 0
            for single_file in patient:
#                 print ("single_file: ", single_file[field])
                if type(single_file[field].value) == pydicom.valuerep.DSfloat:
                    curr_val = single_file[field].value
                elif type(type(single_file[field].value) == pydicom.multival.MultiValue):
                    curr_val = single_file[field].value[len(patient[0][field].value) - 1]
                else:
                    print("FAIL at type ", type(single_file[field].value))

                min_val = min(min_val, curr_val)
                max_val = max(max_val, curr_val)

                curr_passage_rate = curr_val - prev_val

                if abs(curr_passage_rate-prev_passage_rate) > 0.2 :
#                     if(prev_passage_rate):
                    passages.append([curr_passage_start, prev_val, round(prev_passage_rate, 2), round(curr_passage_rate,2), passage_counter])
                    passage_counter = 0
                    curr_passage_start = prev_val
                    prev_passage_rate = curr_passage_rate
                                
                prev_val = curr_val
                passage_counter += 1;

            passages.append([curr_passage_start, prev_val, prev_passage_rate, 0, passage_counter ])

            print("Patient: ", single_file["PatientName"].value, " field: ", field, " (" , min_val , " - ", max_val,")")
            print("\tpassages: ")
            PrintPassages(passages)
            print()
#             print(passages)


In [46]:
def GetFilenameFromFullpath(filename):
    return filename[2]
def GetFolderFromFullpath(filename):
    return filename[1]

In [47]:
def ExtractField(patient, fields):
    fields_list = []
    
    fields_file = open("fields_" + str(patient[0]["PatientName"].value) + ".txt", "w")
    print("Patient: ", patient[0]["PatientName"].value, file=fields_file)

    file_list = []
    for single_file in patient:
        file_list.append(single_file["ReferringPhysicianName"].value)
#         file_list.append(str(GetFolderFromFullpath(single_file["ReferringPhysicianName"].value)) + "/" + str(GetFilenameFromFullpath(single_file["ReferringPhysicianName"].value)))
    fields_list.append(file_list)

    for field in fields:
        print("Field: ", field, file=fields_file)
        file_counter = 0
        field_values = []
        for single_file in patient:
            file_counter += 1
            try:
                field_values.append(single_file[field].value)
                print(single_file[field].value, end=" ", file=fields_file)
            except:
                result = 0
                print("Patient: ", patient[0]["PatientName"].value, " field ", field, " doesn't exists in file#", file_counter)
        print("", file=fields_file)
        fields_list.append(field_values)
    
    fields_file.close()

    return fields_list

def ExtractFields(fields, patients = patients):
    result = 1
    patients_data = []
    
    for patient in patients:
        patients_data.append(ExtractField(patient, fields))
        
    return patients_data


In [48]:
def ArrangeBySeries(patients):
    for patient in patients:
        patient_folder = patient[0][0][1]

        for single_series in np.unique(patient[1]):
            indicies = patient[1] == single_series
            patient_series_folder = series_folder + "/" + str(patient_folder) + "/" + str(single_series)
            
#             if(os.path.exists(patient_series_folder) == True):
#                 shutil.rmtree(patient_series_folder)
            if(os.path.exists(patient_series_folder) == False):
                os.makedirs(patient_series_folder, exist_ok=True)
                print("\t", single_series, ") len:", patient[0, indicies].shape[0])
                for file, idx in zip(patient[0, indicies], patient[2, indicies]):
#                     print(single_series, str(idx) + ")", file)
                    shutil.copyfile(str(file[0]) + "/" + str(file[1]) + "/" + str(file[2]), patient_series_folder + "/" + str(idx) + ".dcm")

        print()

In [71]:
def ConvertDCIMToPNG(src_dcim_file, dst_png_file):
    _dataset = ExtractDCIMMetadata(src_dcim_file)
    plt.axis('off')
    plt.imshow(_dataset.pixel_array)
    save_fig(dst_png_file)
    del _dataset
    
# ConvertDCIMToPNG(
#     "/docs/src/kt/data_by_series_resized/Figina G.A. - Body 1.0-2/16/1.dcm",
#     "/docs/src/kt/data_by_series_resized/Figina G.A. - Body 1.0-2/16/1.png"
# )
# ConvertDCIMToPNG(
#     "/docs/src/kt/data_by_series_resized/Figina G.A. - Body 1.0-2/16/2.dcm",
#     "/docs/src/kt/data_by_series_resized/Figina G.A. - Body 1.0-2/16/2.png"
# )

In [60]:
def translate(value, leftMin, leftMax, rightMin, rightMax):
    # Figure out how 'wide' each range is
    leftSpan = leftMax - leftMin
    rightSpan = rightMax - rightMin

    # Convert the left range into a 0-1 range (float)
    valueScaled = float(value - leftMin) / float(leftSpan)

    # Convert the 0-1 range into a value in the right range.
    return rightMin + (valueScaled * rightSpan)

def GetNumberOfDCMInFolder(folder):
    return len([name for name in os.listdir(folder) if (".dcm" in name) and os.path.isfile(folder + name)])

def GetNumberOfImagesPerSeries(patients):
    number_of_files = []
    for patient_folder in glob.glob(series_folder + "/*"):
        for patient_series_folder in glob.glob(patient_folder + "/*"):
            number_of_files.append(GetNumberOfDCMInFolder(patient_series_folder + "\\"))
        
    return number_of_files

def ResizeSeries(patients):
    number_of_series = GetNumberOfImagesPerSeries(patients)
    number_of_series_sorted = np.sort(number_of_series)
    idxs = np.where(number_of_series_sorted >= MIN_NUMBER_OF_PHOTO)
    min_photos = number_of_series_sorted[idxs[0][0]]
    print("min # of photo:", min_photos)

    for patient_folder in glob.glob(series_folder + "/*"):
        patient_name = patient_folder[len(series_folder) + 1:]
        print(patient_name)
        for patient_series_folder in glob.glob(patient_folder + "/*"):
            series_name = patient_series_folder[len(patient_folder) + 1:]
            patient_series_folder               = series_folder + "/" + str(patient_name) + "/" + str(series_name)
            patient_resized_series_folder       = resized_series_folder + "/" + str(patient_name) + "/" + str(series_name)
            patient_resized_series_image_folder = IMAGES_PATH + "/" + str(patient_name) + "/" + str(series_name)

            if(os.path.exists(patient_resized_series_folder) == False):
                number_of_photos_in_current_folder = GetNumberOfDCMInFolder(patient_series_folder + "\\")
                if(number_of_photos_in_current_folder >= min_photos):
                    os.makedirs(patient_resized_series_folder, exist_ok=True)

#                     print(number_of_photos_in_current_folder, "(min:", min_photos, ")")
                    for photo_name in glob.glob(patient_series_folder + "/*"):
                        src_file_idx = int(photo_name[len(patient_series_folder) + 1:-4])
                        dst_file_idx = int(round(translate(src_file_idx, 1, number_of_photos_in_current_folder, 1, min_photos)))
#                         print(
#                             series_folder + "/" + patient_name + "/" + series_name + "/" + str(src_file_idx) + ".dcm", 
#                             resized_series_folder + "/" + patient_name + "/" + series_name + "/" + str(dst_file_idx) + ".dcm"
#                         )
                        shutil.copyfile(
                            series_folder + "/" + patient_name + "/" + series_name + "/" + str(src_file_idx) + ".dcm", 
                            resized_series_folder + "/" + patient_name + "/" + series_name + "/" + str(dst_file_idx) + ".dcm"
                        )
                else:
                    print("not enough photo in ", patient_series_folder)


In [67]:
def GenerateImagesFromSeries(dcim_folder, image_folder):

    for patient_folder in glob.glob(dcim_folder + "/*"):
        patient_name = patient_folder[len(dcim_folder) + 1:]
#         print(patient_name)
        for patient_series_folder in glob.glob(patient_folder + "/*"):
            series_name = patient_series_folder[len(patient_folder) + 1:]
            print(patient_series_folder)

            from_folder =  dcim_folder + "/" + patient_name + "/" + series_name
            to_folder   = image_folder + "/" + patient_name + "/" + series_name

            if(os.path.exists(to_folder) == False):
                os.makedirs(to_folder, exist_ok=True)
                
                for src_file_name in glob.glob(from_folder + "/*.dcm"):
                    src_file_idx = src_file_name[len(from_folder) + 1:-4]
                    dst_file_idx = "{0:05d}".format(int(src_file_idx))
                    ConvertDCIMToPNG(
                        from_folder + "/" + src_file_idx + ".dcm", 
                        to_folder + "/" + dst_file_idx + ".jpg"
                    )


In [None]:
print("--- Check mandatory fields presence")
if(CheckFieldsPresence(["ReferringPhysicianName", "ImagePositionPatient","SliceLocation", "SliceThickness", "SeriesNumber", "InstanceNumber"])):
    print("--- Arrange patients data to series")
    for patient in patients:
        print("Patient:", patient[0]["PatientName"].value)
        data = np.array(ExtractFields(["SeriesNumber", "InstanceNumber"], patients = [patient]))
        print("shape:", data.shape)
        ArrangeBySeries(data)
    
    print("--- Series resizing")
    ResizeSeries(patients)
    print("--- Image generating from series")
    GenerateImagesFromSeries(resized_series_folder, IMAGES_PATH)

--- Check mandatory fields presence
--- Arrange patients data to series
Patient: BUSLAEV S.N.
shape: (1, 3, 2496)

--- Series resizing
min # of photo: 237
Buslaev S.N. - Body 1.0
Davydov A.A. - Abdomen
not enough photo in  /docs/src/kt/data_by_series/Davydov A.A. - Abdomen/1
not enough photo in  /docs/src/kt/data_by_series/Davydov A.A. - Abdomen/200
not enough photo in  /docs/src/kt/data_by_series/Davydov A.A. - Abdomen/201
Demin A.S. - Body 1.0
Dimakova A.I. - Body 1.0
EFREMOV___SERGEY__ALEXEEVICH_VIPROMID370-85ML
Figina G.A. - Body 1.0
Figina G.A. - Body 1.0-2
Figina G.A. - Body 1.0-3
Golunov V.N. V.P. - Body 1.0 Ce
Grachev Yu.K. - Body 1.0 Ce
Kolotilin Yu.V. - Body 1.0
Kuznecova L.G. - Body 1.0
Levskaya A.A. - Body 1.0
Mironova  M.N. - Body 1.0
Polubar`ev A.Z. - Body 1.0
Romanov V.S. - Body 1.0
Suhanova A.V. - Body 1.0
Tarakanov E.A. - Body 1.0
Terent`eva E.M. - Body 1.0
Ushakov V.L. - Body 1.0
Ushakov V.L. - Body 1.0 Ce
Ushakov V.L. - Body 1.0 Ce-2
Validova E.S. - Body 1.0
Vinograd