# Create list of patients with correct MRI sequences

### Imports

In [2]:
import os
import sys
sys.path.append(r"/Users/LennartPhilipp/Desktop/Uni/Prowiss/Code/Brain_Mets_Classification")

from tqdm import tqdm
from datetime import datetime
import shutil
import pandas as pd

import brain_mets_classification.config as config
import brain_mets_classification.custom_funcs as funcs

In [3]:
#path_to_folder = f"{config.path}/originalPatientFiles"
path_to_folder = f"{config.path}/RgbBrainMetsSampleN30"
pathToCleanMRIList = ""

### Copy files of patients witht the correct sequences into new directory and create a .csv file with the IDs of said patients

In [4]:
#Example file structure
#├── Anonymized - 01005097
#│   └── Mrt Body
#│       ├── Diffusion trace tra schnell_ADC - 8
#│       │   ├── IM-2330-0001-0001.dcm
#│       │   ├── IM-2330-0002-0001.dcm
#...
#│       │   ├── IM-2469-0026-0001.dcm
#│       │   └── IM-2469-0027-0001.dcm
#│       ├── Diffusion trace tra schnell_TRACEW - 7
#│       │   ├── IM-2329-0001-0001.dcm
#│       │   ├── IM-2329-0002-0001.dcm
#...
#│       │   ├── IM-2468-0026-0001.dcm
#│       │   └── IM-2468-0027-0001.dcm
#│       ├── T1 mp-rage3d we sag 1mm KM - 13
#│       │   ├── IM-2335-0001-0001.dcm
#│       │   ├── IM-2335-0002-0001.dcm
#...
#│       │   ├── IM-2474-0159-0001.dcm
#│       │   └── IM-2474-0160-0001.dcm
#├── Anonymized - 12345678
#...

# Creates a new directory for all the patient folders
pathToCleanMRIList = funcs.createNewPreprocessingStepFolder(step=0)

# Goes through list of files/folders at path_to_folder and only adds the directories to the list
folderList = [
    folder for folder in os.listdir(path_to_folder) if os.path.isdir(os.path.join(path_to_folder, folder))
]

patientIDs = []

# Loops through all the "Anonymized - #######" folders
for patient_folder in tqdm(folderList):

    # sequence lists to ensure fitting white and blacklists
    T1_sequences = []
    T1CE_sequences = []
    T2_sequences = []
    FLAIR_sequences = []
    DWI_sequences = []
    ADC_sequences = []

    # ignores the ds_folders
    if config.dsStore in patient_folder:
        continue

    # all folders are named like "Anonymized - 12345678"
    patientID = patient_folder.split(" - ")[1]
    
    # adds the patientID to the list patientIDS if it hasn't been added before
    if patientID not in patientIDs:
        patientIDs.append(patientID)
    
    path_to_MRI_session_folders = os.path.join(path_to_folder, patient_folder)
    MRI_session_folders = os.listdir(path_to_MRI_session_folders)

    # loops through the different MRI sessions
    for mri_session in MRI_session_folders:
        
        # ignores the ds_folders
        if config.dsStore in mri_session:
            continue

        # get a list of all the sequences in the mri_sequences
        path_to_mri_sequences = os.path.join(path_to_MRI_session_folders, mri_session)
        mri_sequences = os.listdir(path_to_mri_sequences)

        # loops through the different sequences created during each MRI session
        for sequence in mri_sequences:

            # ignores the ds_folders
            if config.dsStore in sequence:
                continue

            # lower case the sequence name
            sequences_lower_cased = sequence.lower()

            if any(whiteListSeq in sequences_lower_cased for whiteListSeq in config.seq_whitelist) and not any(blackListSeq in sequences_lower_cased for blackListSeq in config.seq_blacklist):
                # sequence fits both the whitelist as well as the blacklist

                # create folder for patient
                funcs.createFolderForPatient(
                    path = pathToCleanMRIList,
                    patientID = patientID
                )

                # T1 / T1CE
                if any(sequence in sequences_lower_cased for sequence in config.T1list) and not any(subSequence in sequences_lower_cased for subSequence in config.sub):
                
                    # check if T1 sequences is also contrast enhanced (CE)
                    if any(ceSequence in sequences_lower_cased for ceSequence in config.KMlist):

                        # create new folder as pathToCleanMRIList/patientID/T1CE
                        path_to_sequence = funcs.createSequenceFolder(
                            path = os.path.join(pathToCleanMRIList, patientID),
                            patientID = patientID,
                            sequence = "T1CE",
                            sequence_list = T1CE_sequences
                        )

                        # add the name of the sequence to the T1CE sequence list
                        T1CE_sequences.append(sequence)

                        # get list of all the dicom files for the T1CE sequence
                        dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                        # loops through the list of dicom files
                        for dicomFile in dicomFiles:
                            # ignores the ds_folders
                            if config.dsStore in dicomFile:
                                continue

                            # copy each file individually into the path_to_sequence folder
                            shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))
                    
                    else: # just regular T1 sequence
                        
                        # create new folder as pathToCleanMRIList/patientID/T1
                        path_to_sequence = funcs.createSequenceFolder(
                            path = os.path.join(pathToCleanMRIList, patientID),
                            patientID = patientID,
                            sequence = "T1",
                            sequence_list = T1_sequences
                        )

                        # add the name of the sequence to the T1 sequence list
                        T1_sequences.append(sequence)

                        # get list of all the dicom files for the T1 sequence
                        dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                        # loops through the list of dicom files
                        for dicomFile in dicomFiles:
                            # ignores the ds_folders
                            if config.dsStore in dicomFile:
                                continue

                            # copy each file individually into the path_to_sequence folder
                            shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))

                #T2
                if any(T2Sequence in sequences_lower_cased for T2Sequence in config.T2list) and not any(T2SternSequence in sequences_lower_cased for T2SternSequence in config.T2STERNlist) and not any(FlairSequence in sequences_lower_cased for FlairSequence in config.FLAIRlist):

                    # create new folder as pathToCleanMRIList/patientID/T2
                    path_to_sequence = funcs.createSequenceFolder(
                        path = os.path.join(pathToCleanMRIList, patientID),
                        patientID = patientID,
                        sequence = "T2",
                        sequence_list = T2_sequences
                    )

                    # add the name of the sequence to the T2 sequence list
                    T2_sequences.append(sequence)

                    # get list of all the dicom files for the T2 sequence
                    dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                    # loops through the list of dicom files
                    for dicomFile in dicomFiles:
                        # ignores the ds_folders
                        if config.dsStore in dicomFile:
                            continue

                        # copy each file individually into the path_to_sequence folder
                        shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))

                # FLAIR
                if any(FlairSequence in sequences_lower_cased for FlairSequence in config.FLAIRlist):
                    
                    # create new folder as pathToCleanMRIList/patientID/FLAIR
                    path_to_sequence = funcs.createSequenceFolder(
                        path = os.path.join(pathToCleanMRIList, patientID),
                        patientID = patientID,
                        sequence = "FLAIR",
                        sequence_list = FLAIR_sequences
                    )

                    # add the name of the sequence to the FLAIR sequence list
                    FLAIR_sequences.append(sequence)

                    # get list of all the dicom files for the FLAIR sequence
                    dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                    # loops through the list of dicom files
                    for dicomFile in dicomFiles:
                        # ignores the ds_folders
                        if config.dsStore in dicomFile:
                            continue

                        # copy each file individually into the path_to_sequence folder
                        shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))


                # Diffusion
                if any(diffusionSequence in sequences_lower_cased for diffusionSequence in config.DWIlist):
                    
                    # ADC
                    if any(adcSequence in sequences_lower_cased for adcSequence in config.ADClist):

                        # create new folder as pathToCleanMRIList/patientID/ADC
                        path_to_sequence = funcs.createSequenceFolder(
                            path = os.path.join(pathToCleanMRIList, patientID),
                            patientID = patientID,
                            sequence = "ADC",
                            sequence_list = ADC_sequences
                        )

                        # add the name of the sequence to the ADC sequence list
                        ADC_sequences.append(sequence)

                        # get list of all the dicom files for the ADC sequence
                        dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                        # loops through the list of dicom files
                        for dicomFile in dicomFiles:
                            # ignores the ds_folders
                            if config.dsStore in dicomFile:
                                continue

                            # copy each file individually into the path_to_sequence folder
                            shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))
                    
                    # DWI
                    else:

                        # create new folder as pathToCleanMRIList/patientID/DWI
                        path_to_sequence = funcs.createSequenceFolder(
                            path = os.path.join(pathToCleanMRIList, patientID),
                            patientID = patientID,
                            sequence = "DWI",
                            sequence_list = DWI_sequences
                        )

                        # add the name of the sequence to the DWI sequence list
                        DWI_sequences.append(sequence)

                        # get list of all the dicom files for the DWI sequence
                        dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                        # loops through the list of dicom files
                        for dicomFile in dicomFiles:
                            # ignores the ds_folders
                            if config.dsStore in dicomFile:
                                continue

                            # copy each file individually into the path_to_sequence folder
                            shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))

                

    # add different Sequence Names as .txt file
    if (T1_sequences or T1CE_sequences or T2_sequences or FLAIR_sequences):
        with open(f"{pathToCleanMRIList}/{patientID}/sequences.txt", "x") as f:
            f.write("""T1_sequences: {t1}
T1CE_sequences: {t1ce}
T2_sequences: {t2}
FLAIR_sequences: {flair}
DWI_sequences: {dwi}
ADC_sequences: {adc}""".format(t1=T1_sequences, t1ce=T1CE_sequences, t2=T2_sequences, flair=FLAIR_sequences, dwi=DWI_sequences, adc=ADC_sequences)
            )
    
    sequences_list = [T1_sequences, T1CE_sequences, T2_sequences, FLAIR_sequences, DWI_sequences, ADC_sequences]
    for sequence_array in sequences_list:
        if len(sequence_array) > 1:
            print(f"WARNING {patientID}: {len(sequence_array)} sequences of same type, {sequence_array}")


print("done")

  0%|          | 0/30 [00:00<?, ?it/s]

 47%|████▋     | 14/30 [00:02<00:03,  4.35it/s]



 50%|█████     | 15/30 [00:03<00:03,  3.75it/s]



100%|██████████| 30/30 [00:06<00:00,  4.98it/s]

done





### Create pandas Dataframe of all Patients and Sequences
patient:

ID: int

t1: bool

t1ce: bool

t2: bool

flair: bool

dwi: bool

dwi adc: bool


In [None]:
# loop through files of each patient in preprocessing0
# check if t1, t1ce, t2, flair, dwi, ace exist
# create list of boolean values for the existence of each sequence
# add list of [patientID, bool, bool, bool, bool, bool, bool] list of patients
# create dataframe based on step above
# ?
# profit

# run on all patients
# check how many actually have all sequences

Example File Structure below

In [None]:
# ├── 01001917
# │   ├── 01001917_T2_0
# │   │   ├── IM-1281-0001-0001.dcm
# │   │   ├── IM-1281-0002-0001.dcm
# ...
# │   │   ├── IM-1420-0162-0001.dcm
# │   │   └── IM-1420-0163-0001.dcm
# │   ├── 01001917_T2_1
# │   │   ├── IM-1288-0001-0001.dcm
# │   │   ├── IM-1288-0002-0001.dcm
# ...
# │   │   ├── IM-1427-0030-0001.dcm
# │   │   └── IM-1427-0031-0001.dcm
# │   └── sequences.txt
# ├── 01005097
# │   ├── 01005097_ADC_0
# │   │   ├── IM-2330-0001-0001.dcm
# │   │   ├── IM-2330-0002-0001.dcm
# ...
# │   │   ├── IM-2469-0026-0001.dcm
# │   │   └── IM-2469-0027-0001.dcm
# │   ├── 01005097_DWI_0
# │   │   ├── IM-2329-0001-0001.dcm
# │   │   ├── IM-2329-0002-0001.dcm
# ...
# │   │   ├── IM-2468-0026-0001.dcm
# │   │   └── IM-2468-0027-0001.dcm
# │   ├── 01005097_FLAIR_0
# │   │   ├── IM-2327-0001-0001.dcm
# │   │   ├── IM-2327-0002-0001.dcm
# ...
# │   │   ├── IM-2466-0026-0001.dcm
# │   │   └── IM-2466-0027-0001.dcm
# │   ├── 01005097_T1CE_0
# │   │   ├── IM-2333-0001-0001.dcm
# │   │   ├── IM-2333-0002-0001.dcm
# ...
# │   │   ├── IM-2472-0026-0001.dcm
# │   │   └── IM-2472-0027-0001.dcm
# │   ├── 01005097_T1_0
# │   │   ├── IM-2332-0001-0001.dcm
# │   │   ├── IM-2332-0002-0001.dcm
# ...
# │   │   ├── IM-2467-0026-0001.dcm
# │   │   └── IM-2467-0027-0001.dcm
# │   └── sequences.txt
# ├── 01009590
# │   ├── 01009590_ADC_0
# │   │   ├── IM-0866-0001-0001.dcm
# │   │   ├── IM-0866-0002-0001.dcm
# ...
# │   │   ├── IM-0866-0020-0001.dcm
# │   │   ├── IM-0866-0021-0001.dcm

In [9]:
patient_folders = os.listdir(pathToCleanMRIList)

for patient in patient_folders:

    patientID = patient
    has_t1 = False
    has_t1ce = False
    has_t2 = False
    has_flair = False
    has_dwi = False
    has_ace = False
    duplicates = False # True if one sequence exists multiple times for a patient

    # get list of sequences
    sequences = [
        item for item in os.listdir(f"{pathToCleanMRIList}/{patient}") if os.path.isdir(f"{pathToCleanMRIList}/{patient}/{item}")
    ]

    print(sequences)

    for sequence in sequences:
        # sequence ID = T1 or T1CE or T2 ...
        sequenceID = sequence.split("_")[1]
        sequenceCounter = sequence.split("_")[2]

        # duplicates is True if more than one sequence exists
        if sequenceCounter > 0:
            duplicates = True

        print(f"sequence: {sequenceID}")

        if sequenceID == config.desiredSequences.T1.value:
            has_t1 = True
        elif sequenceID == config.desiredSequences.T1CE.value:
            has_t1ce = True
        elif sequenceID == config.desiredSequences.T2.value:
            has_t2 = True
        elif sequenceID == config.desiredSequences.FLAIR.value:
            has_flair = True
        elif sequenceID == config.desiredSequences.DWI.value:
            has_dwi = True
        elif sequenceID == config.desiredSequences.ACE.value:
            has_ace = True

    patientInfos = [patientID, has_t1, has_t1ce, has_t2, has_flair, has_dwi, has_ace, duplicates]
    print(patientInfos)


[]
['01199093', False, False, False, False, False, False, False]
[]
['01220269', False, False, False, False, False, False, False]
[]
['01150136', False, False, False, False, False, False, False]
[]
['01134825', False, False, False, False, False, False, False]
[]
['01083248', False, False, False, False, False, False, False]
[]
['01189050', False, False, False, False, False, False, False]
[]
['01311383', False, False, False, False, False, False, False]
[]
['01100109', False, False, False, False, False, False, False]
[]
['01261127', False, False, False, False, False, False, False]
[]
['01243841', False, False, False, False, False, False, False]
[]
['01152379', False, False, False, False, False, False, False]
[]
['01001917', False, False, False, False, False, False, False]
[]
['01106844', False, False, False, False, False, False, False]
[]
['01241505', False, False, False, False, False, False, False]
[]
['01307298', False, False, False, False, False, False, False]
[]
['01269967', False, Fa