## Preprocessing

### Imports libraries

In [5]:
import os
import sys
sys.path.append(r"/Users/LennartPhilipp/Desktop/Uni/Prowiss/Code/Brain_Mets_Classification")

from tqdm import tqdm
from datetime import datetime
import shutil
import dicom2nifti

import brain_mets_classification.config as config
import brain_mets_classification.custom_funcs as funcs

In [4]:
# path to the sample n=30 directory
sampleDirectoryPath = f"{config.path}/RgbBrainMetsSampleN30"

### Step 0: Copies files and creates new folders with the desired MRI sequences already renamed and converted into nifti files

Sequences that will be used for this project:
- axiale T2
- axiale Flair
- axiale T1 vor + nach KM
- sagittale T1 MPRage nach KM
- evtl. DWI (Diffsionswichtung) + ADC

In [6]:
# Creates a new folder within the directory for all the files of the desired MRI sequences
now = datetime.now()
timeFormatted = now.strftime("%Y-%m-%d %H:%M:%S")
pathToPreprocessing0 = f"{config.path}/Rgb_Brain_Mets_Preprocessing#0_{timeFormatted}"
os.mkdir(pathToPreprocessing0)

# list all the names of the folders within the directory
patientFolders = os.listdir(sampleDirectoryPath)

 # removes the .DS_Store file if it can be found within the directory
dsStore = ".DS_Store"
if dsStore in patientFolders:
    patientFolders.remove(dsStore)

patientsIDs = []
#print(patientFolders)


for folder in tqdm(patientFolders): # loops through the all folders for each patient

    mriSequences = []

    patientID = folder.split(" - ")[1]

    # adds the patientID to the list of patientIDs if is hasn't been added before
    if patientID not in patientsIDs:
        patientsIDs.append(patientID)
    
    folderPath = os.path.join(sampleDirectoryPath, folder) #path to the content of each patient's folder
    os.chdir(folderPath)
    mriImagesFolders = os.listdir(os.getcwd())

    for count in range(0, len(mriImagesFolders)):
        mriImagesFolder = mriImagesFolders[count]
        if mriImagesFolder != config.dsStore:
            os.chdir(mriImagesFolder)
        else:
            continue   
        

        mriSequences = os.listdir(os.getcwd())
        # print(mriSequences)

        for sequence in mriSequences:

            #print(sequence)

            sequenceLC = sequence.lower()
            if any(whiteListSeq in sequenceLC for whiteListSeq in config.seq_whitelist) and not any(blackListSeq in sequenceLC for blackListSeq in config.seq_blacklist):
                # sequence fits both the whiteList as well as the blackList

                # create folder for patient
                funcs.createFolderForPatient(path=pathToPreprocessing0, patientID=patientID)
                
                pathToSequence = os.path.join(folderPath, mriImagesFolder, sequence)

                #T1/T1CE
                if any(sequence in sequenceLC for sequence in config.T1list) and not any(subSequence in sequenceLC for subSequence in config.sub):
                    
                    # check if T1 is contrast enhanced (CE)
                    if any(ceSequence in sequenceLC for ceSequence in config.KMlist):
                        dicom2nifti.convert_directory(pathToSequence, f"{pathToPreprocessing0}/{patientID}")

                        unRenamedFile = funcs.getUnrenamedFile(path=f"{pathToPreprocessing0}/{patientID}")
                        os.rename(unRenamedFile, f"{pathToPreprocessing0}/{patientID}/{patientID}_T1CE.nii.gz")
                        
                        # convert 2 nift, call the file [PATIENTID]_T1CE and move to Patient Folder

                    else:
                        dicom2nifti.convert_directory(pathToSequence, f"{pathToPreprocessing0}/{patientID}")
                        unRenamedFile = funcs.getUnrenamedFile(path=f"{pathToPreprocessing0}/{patientID}")
                        os.rename(unRenamedFile, f"{pathToPreprocessing0}/{patientID}/{patientID}_T1.nii.gz")
                    # convert 2 nift and call the file [PATIENTID]_T1

                #T2
                if any(T2Sequence in sequenceLC for T2Sequence in config.T2list) and not any(T2SternSequence in sequenceLC for T2SternSequence in config.T2STERNlist) and not any(FlairSequence in sequenceLC for FlairSequence in config.FLAIRlist):
                    dicom2nifti.convert_directory(pathToSequence, f"{pathToPreprocessing0}/{patientID}")
                    unRenamedFile = funcs.getUnrenamedFile(path=f"{pathToPreprocessing0}/{patientID}")
                    os.rename(unRenamedFile, f"{pathToPreprocessing0}/{patientID}/{patientID}_T2.nii.gz")

                #FLAIR
                if any(FlairSequence in sequenceLC for FlairSequence in config.FLAIRlist):
                    dicom2nifti.convert_directory(pathToSequence, f"{pathToPreprocessing0}/{patientID}")
                    unRenamedFile = funcs.getUnrenamedFile(path=f"{pathToPreprocessing0}/{patientID}")
                    os.rename(unRenamedFile, f"{pathToPreprocessing0}/{patientID}/{patientID}_FLAIR.nii.gz")

        # move back out
        os.chdir(folderPath)

print("done")

  0%|          | 0/30 [00:00<?, ?it/s]

Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
Removing duplicate slice from series
R

done





### Step 1: Copies patients with the correct amount of sequences (4) into a new directory

In [7]:
#To-do:
# - loop through all the patients from Preprocessing#0
# - only include patients with exactly 4 files and only with FLAIR, T1, T1CE, T2
# - copy files to new directory (i.e. Preprocessing#1)

# Creates a new folder within the directory only for the patients with the correct amount of sequences
now = datetime.now()
timeFormatted = now.strftime("%Y-%m-%d %H:%M:%S")
pathToPreprocessing1 = f"{config.path}/Rgb_Brain_Mets_Preprocessing#1_{timeFormatted}"
os.mkdir(pathToPreprocessing1)

patientFoldersPreprocessing0 = os.listdir(pathToPreprocessing0)
print(patientFoldersPreprocessing0)

for patientID in patientFoldersPreprocessing0:

    if patientID == ".DS_Store": # ignores the .DS_Store directory
        continue
    
    # list of different files within the patient folder
    patientFiles = os.listdir(f"{pathToPreprocessing0}/{patientID}")

    shouldCopyPatientFiles = False
    
    # check amount of files and correct sequences
    if len(patientFiles) == 4:
        shouldCopyPatientFiles = True
    
    for file in patientFiles:
        sequence = file.split("_")[1] # for example: sequence = FLAIR.nii.gz
        sequenceName = sequence.split(".")[0] # for example: sequenceName = FLAIR
        
        if sequenceName not in config.desiredSequences:
            shouldCopyPatientFiles = False
        

    if shouldCopyPatientFiles:
        # create a new patient directory in pathToPreprocessing1
        pathToPreprocessing1Patient = f"{pathToPreprocessing1}/{patientID}"
        os.mkdir(pathToPreprocessing1Patient)

        # copies files to new directory
        for file in patientFiles:
            shutil.copy2(f"{pathToPreprocessing0}/{patientID}/{file}", pathToPreprocessing1Patient)

['01199093', '01220269', '01150136', '01134825', '01083248', '01189050', '01311383', '01100109', '01261127', '01243841', '01152379', '01001917', '01106844', '01241505', '01307298', '01269967', '01190738', '01111974', '01038520', '01321873', '01122863', '01108350', '01009590', '01022787', '01055292', '01314225', '01288896', '01319244', '01005097', '01109318']


### Step 2: Brain extraction
https://github.com/MIC-DKFZ/HD-BET