In [1]:
pip install pydicom

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import pydicom
import numpy 
import csv
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
# problems :

# 1) how to window the scans for saving as PNGs?
# 2) actual files do not have dicom extension , 
#    there is a file named DICOMDIR which is dicom file 
#    but not has any data , but is present in patient tree

In [38]:
# tags to be removed for anonymization
tags = [(0x0009, 0x0010),(0x0010,0x0010),(0x0010,0x0020),(0x0010,0x0030),
        (0x0010,0x0040),(0x0008,0x0080),(0x0008,0x0081),(0x0008,0x0090),
        (0x0008, 0x3010),(0x0010, 0x1010),(0x0008,0x103e),(0x0008,0x1048),
        (0x0008, 0x1050),(0x0008,0x0016),(0x0008,0x0018),(0x0008,0x1110),
        (0x0008, 0x1032),(0x0018,0x9346),(0x0029,0x1140),(0x0032,0x1032),
        (0x0032, 0x1060),(0x0040, 0x0275),(0x0032,0x1064),(0x0002,0x0012),
        (0x0002,0x0013),(0x0008,0x1030),(0x0018,0x1030),(0x0020,0x0052)]

def are_dicoms(root,files):
    print()
    # TEMPORARY SOL TO PROBLEM NO. 2
    if len(files) == 0:
        return False
    if files[0] == "DICOMDIR":
            return False
    print("Files in function : ",files)
    for i in range(len(files)):
        file_path = os.path.join(root,files[i])
        try:
            dicom_file = pydicom.dcmread(file_path)
        except pydicom.errors.InvalidDicomError:
            # If any of the files is not dicom return false
            return False
    # If all the files get opened then return true
    return True

def anonimize_file(dcm_obj):
    for tag in tags:
        if tag in dcm_obj:
              del dcm_obj[tag]
    # else:
    #     print("The specified tag does not exist.")
    return dcm_obj

def create_dir(path):
    if not os.path.exists(path):
        try:
            os.makedirs(path)
        except OSError as e:
            print(f"Failed to create the directory : {path}")
            
def window_image(img, window_center,window_width,intercept,slope ,rescale=True):
    img = (img*slope +intercept) 

    window_center = 50
    window_width = 350

    img_min = window_center - window_width//2
    img_max = window_center + window_width//2 
    img[img<img_min] = img_min 
    img[img>img_max] = img_max 
    if rescale:
        img = (img - img_min) / (img_max - img_min)*255.0
    return img

def get_first_of_dicom_field_as_int(x):
    #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
    if type(x) == pydicom.multival.MultiValue: return int(x[0])
    else: return int(x)

def get_windowing(data):
    dicom_fields = [data[('0028','1050')].value, #window center
                    data[('0028','1051')].value, #window width
                    data[('0028','1052')].value, #intercept
                    data[('0028','1053')].value] #slope
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]

def sort_dict_by_key(dictionary):
    sorted_dict = dict(sorted(dictionary.items(), key=lambda item: item[0]))
    return sorted_dict

def find_largest_key_for_value(dictionary, value):
    largest_key = None
    for key in sorted(dictionary.keys(), reverse=True):
        if dictionary[key] == value:
            largest_key = key
            break

    return largest_key

In [41]:
data_path = input("Enter data folder path : ")

# let this be : D:\CA Lung
new_data_path = data_path.split("\\")[-1] + "_Anonymized"
main_folder_name = data_path.split("\\")[-1]

# png_folder_path = data
patient_ID_mapping = {}
patient_counter = 1.0

for root,dirs,files in os.walk(data_path,topdown = True):
    print("root : ",root)
    print("dirs : ",dirs)
    print("files : ",files)
    if are_dicoms(root,files):
        print("dicom file folder reached!")
        # here we get all the dicoms of a particular patient 
        # each dicom is a member of the files list 
        
        # root :- 
        # D:\CA Lung\101446474_vinleshwar_03.10.2020\DICOM\23041904\09080000
        # wanted root :-
        # D:\{new_data_path}\Patient_{patient_counter}\DICOM\23041904\09080000
        patient_UID = (pydicom.dcmread(os.path.join(root,files[0]))).PatientID
        
        # make a mapping 
        if(patient_UID in patient_ID_mapping.values()):
            # This means that one person has 2 scans in the data 
            # Find the last patient counter and update it by 0.1
            patient_ID_mapping = sort_dict_by_key(patient_ID_mapping)
            local_counter = find_largest_key_for_value(patient_ID_mapping,patient_UID)
            local_counter = round(local_counter,2)
            patient_ID_mapping[local_counter+0.1] = patient_UID
            
        else:
            patient_ID_mapping[patient_counter] = patient_UID
            patient_counter += 1
        
        new_root = root
        new_root_list = new_root.split("\\")
        
        # changing the main folder name to folder_anonymized
        for i in range(len(new_root_list)):
            if new_root_list[i] == main_folder_name:
                new_root_list[i] = new_data_path
                
                 #This is the patient folder name
                patient_ID_mapping = sort_dict_by_key(patient_ID_mapping)
                local_counter = find_largest_key_for_value(patient_ID_mapping,patient_UID)
                new_root_list[i+1] = f"Patient_{round(local_counter,2)}"
        
        new_root_path = "\\".join(new_root_list)
        print("new root path : ",new_root_list)
        
        slice_n = 1
        for dicom in files:
            
            # each dicom file
            dicom_path = os.path.join(root,dicom)
            dicom_obj = pydicom.dcmread(dicom_path)
            anon_dicom = anonimize_file(dicom_obj)
            array = anon_dicom.pixel_array
            window_center , window_width, intercept, slope = get_windowing(dicom_obj)
            array = window_image(array, window_center, window_width, intercept, slope, rescale = True)
            
            
            # make the new root directory and save the new dicom
            create_dir(new_root_path)
            print("Successfull in creation of directory")
            
            
            # saving new dicom files
            patient_ID_mapping = sort_dict_by_key(patient_ID_mapping)
            local_counter_patient = find_largest_key_for_value(patient_ID_mapping,patient_UID)
            local_counter_patient = round(local_counter_patient,2)
            output_path = os.path.join(new_root_path,f"Patient_{local_counter_patient}_{slice_n}")
            anon_dicom.save_as(output_path)
            
            
            
            # create a folder in this folder and save a png in that 
            png_folder_path = os.path.join(new_root_path,f"Images_Patient{local_counter_patient}") 
            create_dir(png_folder_path)
            plt.imsave(png_folder_path+f'/slice_{slice_n}.png',array, cmap='gray')
            
            slice_n += 1
        
        # FOR SAVING THE CSV FILE 
        
        for i in range(len(new_root_path.split("\\"))):
            if new_root_path.split("\\")[i] == new_data_path:
                csv_folder_path = "\\".join(new_root_path.split("\\")[:i+1])
                break
        print("csv folder path : ",csv_folder_path)
        output_file_path = csv_folder_path + '\mapping.csv'
        
        
            
        with open(output_file_path, 'w', newline='') as csvfile:
            fieldnames = ['PatientID', 'mapping']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()

            for key, value in patient_ID_mapping.items():
                writer.writerow({'PatientID': key, 'mapping': value})
        
        
    print("**********************")

# Now we need to store this mapping inside the data_path folder only


        

Enter data folder path : C:\Users\RITWIK KASHYAP\Desktop\aiims\Data-Anonymization\Data
root :  C:\Users\RITWIK KASHYAP\Desktop\aiims\Data-Anonymization\Data
dirs :  ['CRLM-CT-1001', 'CRLM-CT-1002', 'CRLM-CT-1003', 'CRLM-CT-1004', 'CRLM-CT-1005', 'Patient-1', 'Patient-2', 'Patient-3', 'Patient-4', 'Patient-5']
files :  []

**********************
root :  C:\Users\RITWIK KASHYAP\Desktop\aiims\Data-Anonymization\Data\CRLM-CT-1001
dirs :  ['06-06-1992-NA-CT ANGIO ABD WITH PEL-75163']
files :  []

**********************
root :  C:\Users\RITWIK KASHYAP\Desktop\aiims\Data-Anonymization\Data\CRLM-CT-1001\06-06-1992-NA-CT ANGIO ABD WITH PEL-75163
dirs :  ['101.000000-NA-71548']
files :  []

**********************
root :  C:\Users\RITWIK KASHYAP\Desktop\aiims\Data-Anonymization\Data\CRLM-CT-1001\06-06-1992-NA-CT ANGIO ABD WITH PEL-75163\101.000000-NA-71548
dirs :  []
files :  ['1-001.dcm', '1-002.dcm', '1-003.dcm', '1-004.dcm', '1-005.dcm', '1-006.dcm', '1-007.dcm', '1-008.dcm', '1-009.dcm', '1-0

Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
S

dicom file folder reached!
new root path :  ['C:', 'Users', 'RITWIK KASHYAP', 'Desktop', 'aiims', 'Data-Anonymization', 'Data_Anonymized', 'Patient_2.0', '07-12-1992-NA-CT ANGIO ABD WITH CH AND PEL-46457', '101.000000-NA-36282']
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of dire

dicom file folder reached!
new root path :  ['C:', 'Users', 'RITWIK KASHYAP', 'Desktop', 'aiims', 'Data-Anonymization', 'Data_Anonymized', 'Patient_3.0', '09-24-1994-NA-CT ANGIO LIVER WITH CHPEL-87341', '105.000000-NA-56108']
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directo

dicom file folder reached!
new root path :  ['C:', 'Users', 'RITWIK KASHYAP', 'Desktop', 'aiims', 'Data-Anonymization', 'Data_Anonymized', 'Patient_5.0', '01-31-1994-NA-CT ANGIO LIVER WITH CHPEL-61769', '101.000000-NA-58706']
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directo

Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
csv folder path :  C:\Users\RITWIK KASHYAP\Desktop\aiims\Data-Anonymization\Data_Anonymized
**********************
root :  C:\Users\RITWIK KASHYAP\Desktop\aiims\Data-Anonymization\Data\Patient-5
dirs :  []
files :  ['IMG-0001-00001.dcm', 'IMG-0002-00001.dcm', 'IMG-0002-00002.dcm', 'IMG-0002-00003.dcm', 'IMG-0002-00004.dcm']

Files in function :  ['IMG-0001-00001.dcm', 'IMG-0002-00001.dcm', 'IMG-0002-00002.dcm', 'IMG-0002-00003.dcm', 'IMG-0002-00004.dcm']
dicom file folder reached!
new root path :  ['C:', 'Users', 'RITWIK KASHYAP', 'Desktop', 'aiims', 'Data-Anonymization', 'Data_Anonymized', 'Patient_6.4']
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
Successfull in creation of directory
csv folder path :  C:\Users\RITWIK KASHYAP\Desktop\aii