## Trie Image DICOM

In [5]:
import os
import pydicom 
from pydicom import dcmread
from math import *
import numpy as np
import SimpleITK as sitk
import sys, time, os
import re

In [6]:
def clean_text(string):
    # clean and standardize text descriptions, which makes searching files easier
    forbidden_symbols = ["*", ".", ",", "\"", "\\", "/", "|", "[", "]", ":", ";", " "]
    for symbol in forbidden_symbols:
        string = string.replace(symbol, "_") # replace everything with an underscore
    return string.lower()

def clean_folder_path(chemin_sortie):
    chemin_sortie=chemin_sortie.replace('\\','/')
    chemin_sortie=chemin_sortie.replace('<','_')
    chemin_sortie=chemin_sortie.replace('>','_')
    return chemin_sortie

def clean_filename(string):
    # clean and standardize text descriptions, which makes searching files easier
    forbidden_symbols = ["*", ".", ",", "\"", "\\", "/", "|", "[", "]", ":", ";", " "]
    for symbol in forbidden_symbols:
        string = string.replace(symbol, "_") # replace everything with an underscore
    return string.lower()

def list_file_adresse(src):
    print('reading file list...')
    unsortedList = []
    for root, dirs, files in os.walk(src):
        for file in files:
            #if ".dcm" in file:# exclude non-dicoms, good for messy folders
            try:
                chemin_entre=os.path.join(root, file)
                #img_metadata=pydicom.read_file(chemin_entre,force=True) #test si dicom :)
                unsortedList.append(chemin_entre)
            except:
                pass
    print('%s files found.' % len(unsortedList))
    return unsortedList

def trie_fichiers_dicom(unsortedList,dst):
    i=0
    for dicom_loc in unsortedList:
        # read the file
        ds = pydicom.read_file(dicom_loc, force=True)
        i=i+1
        patientID = clean_text(ds.get("PatientID", "NA"))
        studyDate = clean_text(ds.get("StudyDate", "NA"))
        seriesDescription = clean_text(ds.get("SeriesDescription", "NA"))
        modality = ds.get("Modality","NA")
        studyInstanceUID = ds.get("StudyInstanceUID","NA")
        seriesInstanceUID = ds.get("SeriesInstanceUID","NA")
        instanceNumber = str(ds.get("InstanceNumber","0"))
        fileName = modality + "." + seriesInstanceUID + "." + instanceNumber
        fileName=clean_filename(fileName)
        fileName=fileName+ ".dcm"
        
        try:
            if not os.path.exists(os.path.join(dst, patientID)):
                chemin_sortie=os.path.join(dst, patientID)
                chemin_sortie=clean_folder_path(chemin_sortie)
                os.makedirs(chemin_sortie)

            if not os.path.exists(os.path.join(dst, patientID, studyDate)):
                chemin_sortie=os.path.join(dst, patientID, studyDate)
                chemin_sortie=clean_folder_path(chemin_sortie)
                os.makedirs(chemin_sortie)

            
            if not os.path.exists(os.path.join(dst, patientID, studyDate, seriesDescription)):
                chemin_sortie=os.path.join(dst, patientID, studyDate, seriesDescription)
                chemin_sortie=clean_folder_path(chemin_sortie)
                os.makedirs(chemin_sortie)

            try :
                print('Saving out file: %s - %s - %s.' % (patientID, studyDate, seriesDescription ))
                chemin_sortie=os.path.join(dst, patientID, studyDate, seriesDescription, fileName)
                chemin_sortie=clean_folder_path(chemin_sortie)
                os.renames(dicom_loc, chemin_sortie)
            except:
                print(str(dicom_loc))
                print('Erreur ?')
        except:
            print('Erreur lors de la creation du fichiers')

print('done.')


# user specified parameters

src = "/home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/00_Preprocessing"        ## Dossier importation
dst = "/home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/01_Preprocessing_trie"   ## Dossier exportation

###############################
unsortedList=list_file_adresse(src)
########################
trie_fichiers_dicom(unsortedList,dst)

done.
reading file list...
424 files found.
Saving out file: 202210140 - 20221110 - tomotherapy_structure_set.
Saving out file: 202210140 - 20221110 - tomotherapy_planned_dose.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 -

Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t1_gado.
Saving out file: 202210140 - 20221110 - fusion_t

## Image DICOM to Nifty

In [7]:
data_directory      = "/home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/01_Preprocessing_trie"
adresse_save_result = "/home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/02_Preprocessing_nii"

def main(data_directory, adresse_save_result):
    timeInit        = time.time()
    Nimageouverte   = 0
    Nimagetraitees  = 0
    
   ########################################iterate trough subfolder##############
   
    directory_list  = []
    i               = 0 #number of sub folder
    
    for root, dirs, files in os.walk(data_directory):
        for subdirname in dirs:
            directory_list.append(os.path.join(root,subdirname))
            
    ################################partie principale du code
    
    for i in range(len(directory_list)):
        data_directory = directory_list[i].replace('\\','/')
        series_IDs     = sitk.ImageSeriesReader.GetGDCMSeriesIDs(data_directory)
        if not series_IDs:
            print("ERROR: given directory \""+data_directory+"\" does not contain a DICOM series.")
        else:
            for i,series_ID in enumerate(series_IDs):   
                Nimageouverte     = Nimageouverte+1
                series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames(data_directory, series_ID,useSeriesDetails=False) #useSeriesDetails ?
                try:
                    img_metadata = pydicom.read_file(series_file_names[0])  #importation des metadata lié à l'image
                    if True : #img_metadata.Modality=='MR':#"PET TAP AC HD (AC)": #"[DetailWB_CTAC_2i-10s-PSF] Body"
                        try:
                            timeRMR1       = time.time()
                            Nimagetraitees = Nimagetraitees+1
                            series_reader  = sitk.ImageSeriesReader()
                            series_reader.SetFileNames(series_file_names)
                            img            = series_reader.Execute()  #importation de l'image
                            series_file_names  = series_file_names[0].split("/")
                            #series_file_names1 = series_file_names[-1].split("\\")
                            name               = series_file_names[-4]+"_"+series_file_names[-3]+"_"+series_file_names[-2]+".nii"
                            nameID             = series_file_names[-4]
                            #save_path          = adresse_save_result+"/"+series_file_names[-3]
                            save_path = os.path.join(adresse_save_result,name)
                            sitk.WriteImage(img,save_path) #attention verifier les dicom de l'image sinon .CopyInformation
                            timeRMR2               = time.time()
                            TimeForrunFunctionRMR2 = timeRMR2 - timeRMR1
                            print(u"La fonction de traitement s'est executée en " + str(TimeForrunFunctionRMR2) +" secondes")
                            print("\n")
                        except RuntimeError:
                            print ("--> Probleme avec l'importation et/ou le traitement d'image")
                except RuntimeError:
                    print ("--> Probleme avec la lecture des metadata")
    print("\n")
    print("Nombre d'image total lue:"+str(Nimageouverte)+"\n")
    print("Nombre d'image total traité:"+str(Nimagetraitees)+"\n" )
    timefinal = time.time()
    TimeTotal = timefinal - timeInit
    print(u"Le traitement de l'ensemble des données c'est executée en " + str(TimeTotal) +" secondes")

##################################Execution du code############################################
###############################################################################################  
main(data_directory, adresse_save_result)

ERROR: given directory "/home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/01_Preprocessing_trie/202210140" does not contain a DICOM series.
ERROR: given directory "/home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/01_Preprocessing_trie/202210140/20221110" does not contain a DICOM series.


GDCMSeriesFileNames (0xc74bcb0): No Series were found

GDCMSeriesFileNames (0xc74bcb0): No Series were found

ImageSeriesReader (0xc74bcb0): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000714536



La fonction de traitement s'est executée en 0.27101874351501465 secondes


La fonction de traitement s'est executée en 0.16820216178894043 secondes


La fonction de traitement s'est executée en 0.03710818290710449 secondes


ERROR: given directory "/home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/01_Preprocessing_trie/202210140/20221110/tomotherapy_structure_set" does not contain a DICOM series.


Nombre d'image total lue:3

Nombre d'image total traité:3

Le traitement de l'ensemble des données c'est executée en 0.6413118839263916 secondes


GDCMSeriesFileNames (0xc74bcb0): No Series were found



## Renommer fichier

In [8]:
def replace_date(file_path):
    files = os.listdir(file_path)

    for file_name in files:
        
        if "fusion" in file_name:
            match = re.match(r'(\d+)_(\d+)_fusion', file_name)
            
            if match:
                patient_id, date = match.groups()
                new_file_name = f'{patient_id}_IRMpre.nii'
                perform_rename(file_path, file_name, new_file_name)
            else:
                print(f'Skipped (no match for "fusion"): {file_name}')

        # Remplace la date par 'IRMpost_4mois' si 'sag_t1_mprage_3d_gado_mpr_tra' est dans le nom du fichier
        elif "sag_t1_mprage_3d_gado_mpr_tra" in file_name:
            match = re.match(r'(\d+)_(\d+)_sag_t1_mprage_3d_gado_mpr_tra', file_name)
            if match:
                patient_id, date = match.groups()
                new_file_name = f'{patient_id}_IRMpost_4mois.nii'
                perform_rename(file_path, file_name, new_file_name)
            else:
                print(f'Skipped (no match for "XXX"): {file_name}')
     
        elif 'tomotherapy_planned_dose' in file_name:
            match = re.match(r'(\d+)_(\d+)_tomotherapy_planned_dose', file_name)
            if match:
                patient_id, date = match.groups()
                new_file_name = f'{patient_id}_RTDOSE.nii'
                perform_rename(file_path, file_name, new_file_name)
            else:
                print(f'Skipped (no match for date replacement): {file_name}')
         
        # Remplace 'doses_eclipse" par RTDOSE
        elif "doses_eclipse" in file_name:
            match = re.match(r'(\d+)_(\d+)_doses_eclipse', file_name)
            if match:
                patient_id, date = match.groups()
                new_file_name = f'{patient_id}_RTDOSE.nii'
                perform_rename(file_path, file_name, new_file_name)
            else:
                print(f'Skipped (no match for date replacement): {file_name}')
               
        elif "kvct" in file_name:
            match = re.match(r'(\d+)_(\d+)_kvct', file_name)
            if match:
                patient_id, date = match.groups()
                new_file_name = f'{patient_id}_CT.nii'
                perform_rename(file_path, file_name, new_file_name)
            else:
                print(f'Skipped (no match for date replacement): {file_name}')

        elif "crane_2mm" in file_name:
            match = re.match(r'(\d+)_(\d+)_crane_2mm', file_name)
            if match:
                patient_id, date = match.groups()
                new_file_name = f'{patient_id}_CT.nii'
                perform_rename(file_path, file_name, new_file_name)
            else:
                print(f'Skipped (no match for date replacement): {file_name}')
               
def perform_rename(file_path, old_file_name, new_file_name):
    old_file_path = os.path.join(file_path, old_file_name)
    new_file_path = os.path.join(file_path, new_file_name)
    
    os.rename(old_file_path, new_file_path)
    print(f'Renamed: {old_file_path} -> {new_file_path}')

folder_path = "/home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/02_Preprocessing_nii"


replace_date(folder_path)

Renamed: /home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/02_Preprocessing_nii/202210140_20221110_fusion_t1_gado.nii -> /home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/02_Preprocessing_nii/202210140_IRMpre.nii
Renamed: /home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/02_Preprocessing_nii/202210140_20221110_kvct_image_set.nii -> /home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/02_Preprocessing_nii/202210140_CT.nii
Renamed: /home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/02_Preprocessing_nii/202210140_20221110_tomotherapy_planned_dose.nii -> /home/aurelien/Documents/IA-Med-Im/Partie1_GBM_RT/02_Preprocessing_nii/202210140_RTDOSE.nii
