# Create list of patients with correct MRI sequences

### Imports

In [10]:
import os
import sys
sys.path.append(r"/Users/LennartPhilipp/Desktop/Uni/Prowiss/Code/Brain_Mets_Classification")

from tqdm import tqdm
from datetime import datetime
import shutil
import pandas as pd

import brain_mets_classification.config as config
import brain_mets_classification.custom_funcs as funcs

In [11]:
#path_to_folder = f"{config.path}/originalPatientFiles"
path_to_folder = f"{config.path}/RgbBrainMetsSampleN30"
pathToCleanMRIList = ""

### Copy files of patients witht the correct sequences into new directory and create a .txt file with the IDs of said patients

In [18]:
def sequenceNameIsInLists(rejectedLists, sequenceName):
    '''a helper function that checks if the sequence has already been added to the rejected Sequences Array
    arguments:
    rejectedLists: [[string]] = list of all the different rejected sequences lists
    sequenceName: string = name of the current sequence
    '''
    sequenceNameInLists = False
    for list in rejectedLists:
        if sequenceName in list:
            sequenceNameInLists = True
    return sequenceNameInLists

In [20]:
#Example file structure
#├── Anonymized - 01005097
#│   └── Mrt Body
#│       ├── Diffusion trace tra schnell_ADC - 8
#│       │   ├── IM-2330-0001-0001.dcm
#│       │   ├── IM-2330-0002-0001.dcm
#...
#│       │   ├── IM-2469-0026-0001.dcm
#│       │   └── IM-2469-0027-0001.dcm
#│       ├── Diffusion trace tra schnell_TRACEW - 7
#│       │   ├── IM-2329-0001-0001.dcm
#│       │   ├── IM-2329-0002-0001.dcm
#...
#│       │   ├── IM-2468-0026-0001.dcm
#│       │   └── IM-2468-0027-0001.dcm
#│       ├── T1 mp-rage3d we sag 1mm KM - 13
#│       │   ├── IM-2335-0001-0001.dcm
#│       │   ├── IM-2335-0002-0001.dcm
#...
#│       │   ├── IM-2474-0159-0001.dcm
#│       │   └── IM-2474-0160-0001.dcm
#├── Anonymized - 12345678
#...

# Creates a new directory for all the patient folders
pathToCleanMRIList = funcs.createNewPreprocessingStepFolder(step=0)

# Goes through list of files/folders at path_to_folder and only adds the directories to the list
folderList = [
    folder for folder in os.listdir(path_to_folder) if os.path.isdir(os.path.join(path_to_folder, folder))
]

patientIDs = []

# create .txt file for sequence duplicates
sequence_duplicates_file = open(f"{pathToCleanMRIList}/sequence_duplicates.txt", "w")
sequence_duplicates_file.close()

# Loops through all the "Anonymized - #######" folders
for patient_folder in tqdm(folderList):

    # sequence lists to ensure fitting white and blacklists
    T1_sequences = []
    T1CE_sequences = []
    T2_sequences = []
    FLAIR_sequences = []
    DWI_sequences = []
    ADC_sequences = []

    # rejected sequence names
    first_rejection_sequences = []
    T1_rejected_sequences = []
    T2_rejected_sequences = []
    FLAIR_rejected_sequences = []
    diffusion_rejected_sequences = []

    # ignores the ds_folders
    if config.dsStore in patient_folder:
        continue

    # all folders are named like "Anonymized - 12345678"
    patientID = patient_folder.split(" - ")[1]
    
    # adds the patientID to the list patientIDS if it hasn't been added before
    if patientID not in patientIDs:
        patientIDs.append(patientID)
    
    path_to_MRI_session_folders = os.path.join(path_to_folder, patient_folder)
    MRI_session_folders = os.listdir(path_to_MRI_session_folders)

    # loops through the different MRI sessions
    for mri_session in MRI_session_folders:
        
        # ignores the ds_folders
        if config.dsStore in mri_session:
            continue

        # get a list of all the sequences in the mri_sequences
        path_to_mri_sequences = os.path.join(path_to_MRI_session_folders, mri_session)
        mri_sequences = os.listdir(path_to_mri_sequences)

        # loops through the different sequences created during each MRI session
        for sequence in mri_sequences:

            # ignores the ds_folders
            if config.dsStore in sequence:
                continue

            # lower case the sequence name
            sequences_lower_cased = sequence.lower()

            if any(whiteListSeq in sequences_lower_cased for whiteListSeq in config.seq_whitelist) and not any(blackListSeq in sequences_lower_cased for blackListSeq in config.seq_blacklist):
                # sequence fits both the whitelist as well as the blacklist

                # create folder for patient
                funcs.createFolderForPatient(
                    path = pathToCleanMRIList,
                    patientID = patientID
                )

                # T1 / T1CE
                if any(sequence in sequences_lower_cased for sequence in config.T1list) and not any(subSequence in sequences_lower_cased for subSequence in config.sub):
                
                    # check if T1 sequences is also contrast enhanced (CE)
                    if any(ceSequence in sequences_lower_cased for ceSequence in config.KMlist):

                        # create new folder as pathToCleanMRIList/patientID/T1CE
                        path_to_sequence = funcs.createSequenceFolder(
                            path = os.path.join(pathToCleanMRIList, patientID),
                            patientID = patientID,
                            sequence = "T1CE",
                            sequence_list = T1CE_sequences,
                            original_sequence_name = sequence
                        )

                        # add the name of the sequence to the T1CE sequence list
                        T1CE_sequences.append(sequence)

                        # get list of all the dicom files for the T1CE sequence
                        dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                        # loops through the list of dicom files
                        for dicomFile in dicomFiles:
                            # ignores the ds_folders
                            if config.dsStore in dicomFile:
                                continue

                            # copy each file individually into the path_to_sequence folder
                            shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))
                    
                    else: # just regular T1 sequence
                        
                        # create new folder as pathToCleanMRIList/patientID/T1
                        path_to_sequence = funcs.createSequenceFolder(
                            path = os.path.join(pathToCleanMRIList, patientID),
                            patientID = patientID,
                            sequence = "T1",
                            sequence_list = T1_sequences,
                            original_sequence_name = sequence
                        )

                        # add the name of the sequence to the T1 sequence list
                        T1_sequences.append(sequence)

                        # get list of all the dicom files for the T1 sequence
                        dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                        # loops through the list of dicom files
                        for dicomFile in dicomFiles:
                            # ignores the ds_folders
                            if config.dsStore in dicomFile:
                                continue

                            # copy each file individually into the path_to_sequence folder
                            shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))

                #else: # add other sequences to the rejected sequences list
                    # addNewSequenceToRejectedList(rejectedLists = [first_rejection_sequences, T1_rejected_sequences, T2_rejected_sequences, FLAIR_rejected_sequences, diffusion_rejected_sequences],
                    #                              sequenceName = sequence,
                    #                              listToAddItemTo = T1_rejected_sequences)
                    #T1_rejected_sequences.append(sequence)

                #T2
                if any(T2Sequence in sequences_lower_cased for T2Sequence in config.T2list) and not any(T2SternSequence in sequences_lower_cased for T2SternSequence in config.T2STERNlist) and not any(FlairSequence in sequences_lower_cased for FlairSequence in config.FLAIRlist):

                    # create new folder as pathToCleanMRIList/patientID/T2
                    path_to_sequence = funcs.createSequenceFolder(
                        path = os.path.join(pathToCleanMRIList, patientID),
                        patientID = patientID,
                        sequence = "T2",
                        sequence_list = T2_sequences,
                        original_sequence_name = sequence
                    )

                    # add the name of the sequence to the T2 sequence list
                    T2_sequences.append(sequence)

                    # get list of all the dicom files for the T2 sequence
                    dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                    # loops through the list of dicom files
                    for dicomFile in dicomFiles:
                        # ignores the ds_folders
                        if config.dsStore in dicomFile:
                            continue

                        # copy each file individually into the path_to_sequence folder
                        shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))

                #else: # add other sequences to the T2 rejected sequences list
                    # addNewSequenceToRejectedList(rejectedLists = [first_rejection_sequences, T1_rejected_sequences, T2_rejected_sequences, FLAIR_rejected_sequences, diffusion_rejected_sequences],
                    #                              sequenceName = sequence,
                    #                              listToAddItemTo = T2_rejected_sequences)
                    #T2_rejected_sequences.append(sequence)

                # FLAIR
                if any(FlairSequence in sequences_lower_cased for FlairSequence in config.FLAIRlist):
                    
                    # create new folder as pathToCleanMRIList/patientID/FLAIR
                    path_to_sequence = funcs.createSequenceFolder(
                        path = os.path.join(pathToCleanMRIList, patientID),
                        patientID = patientID,
                        sequence = "FLAIR",
                        sequence_list = FLAIR_sequences,
                        original_sequence_name = sequence
                    )

                    # add the name of the sequence to the FLAIR sequence list
                    FLAIR_sequences.append(sequence)

                    # get list of all the dicom files for the FLAIR sequence
                    dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                    # loops through the list of dicom files
                    for dicomFile in dicomFiles:
                        # ignores the ds_folders
                        if config.dsStore in dicomFile:
                            continue

                        # copy each file individually into the path_to_sequence folder
                        shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))

                # else: # add other sequences to the FLAIR rejected sequences list
                #     addNewSequenceToRejectedList(rejectedLists = [first_rejection_sequences, T1_rejected_sequences, T2_rejected_sequences, FLAIR_rejected_sequences, diffusion_rejected_sequences],
                #                                  sequenceName = sequence,
                #                                  listToAddItemTo = FLAIR_rejected_sequences)
                    #FLAIR_rejected_sequences.append(sequences_lower_cased)

                # Diffusion
                if any(diffusionSequence in sequences_lower_cased for diffusionSequence in config.DWIlist):
                    
                    # ADC
                    if any(adcSequence in sequences_lower_cased for adcSequence in config.ADClist):

                        # create new folder as pathToCleanMRIList/patientID/ADC
                        path_to_sequence = funcs.createSequenceFolder(
                            path = os.path.join(pathToCleanMRIList, patientID),
                            patientID = patientID,
                            sequence = "ADC",
                            sequence_list = ADC_sequences,
                            original_sequence_name = sequences_lower_cased
                        )

                        # add the name of the sequence to the ADC sequence list
                        ADC_sequences.append(sequence)

                        # get list of all the dicom files for the ADC sequence
                        dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                        # loops through the list of dicom files
                        for dicomFile in dicomFiles:
                            # ignores the ds_folders
                            if config.dsStore in dicomFile:
                                continue

                            # copy each file individually into the path_to_sequence folder
                            shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))
                    
                    # DWI
                    else:

                        # create new folder as pathToCleanMRIList/patientID/DWI
                        path_to_sequence = funcs.createSequenceFolder(
                            path = os.path.join(pathToCleanMRIList, patientID),
                            patientID = patientID,
                            sequence = "DWI",
                            sequence_list = DWI_sequences,
                            original_sequence_name = sequences_lower_cased
                        )

                        # add the name of the sequence to the DWI sequence list
                        DWI_sequences.append(sequence)

                        # get list of all the dicom files for the DWI sequence
                        dicomFiles = os.listdir(os.path.join(path_to_mri_sequences, sequence))

                        # loops through the list of dicom files
                        for dicomFile in dicomFiles:
                            # ignores the ds_folders
                            if config.dsStore in dicomFile:
                                continue

                            # copy each file individually into the path_to_sequence folder
                            shutil.copyfile(os.path.join(path_to_mri_sequences, sequence, dicomFile), os.path.join(path_to_sequence, dicomFile))

                # else: # add other sequences to the diffusion rejected sequences list
                #     addNewSequenceToRejectedList(rejectedLists = [first_rejection_sequences, T1_rejected_sequences, T2_rejected_sequences, FLAIR_rejected_sequences, diffusion_rejected_sequences],
                #                                  sequenceName = sequence,
                #                                  listToAddItemTo = diffusion_rejected_sequences)
                    #diffusion_rejected_sequences.append(sequence)

            else: # add other sequences to the first rejection sequences list
                #addNewSequenceToRejectedList(rejectedLists = [first_rejection_sequences, T1_rejected_sequences, T2_rejected_sequences, FLAIR_rejected_sequences, diffusion_rejected_sequences],
                                             #sequenceName = sequence,
                                             #listToAddItemTo = first_rejection_sequences)
                #first_rejection_sequences.append(sequence)

               

    # add different Sequence Names as .txt file
    if (T1_sequences or T1CE_sequences or T2_sequences or FLAIR_sequences):
        with open(f"{pathToCleanMRIList}/{patientID}/sequences.txt", "x") as f:
            f.write("""T1_sequences: {t1}
T1CE_sequences: {t1ce}
T2_sequences: {t2}
FLAIR_sequences: {flair}
DWI_sequences: {dwi}
ADC_sequences: {adc}""".format(t1=T1_sequences, t1ce=T1CE_sequences, t2=T2_sequences, flair=FLAIR_sequences, dwi=DWI_sequences, adc=ADC_sequences)
            )
    # else: # remove the empty folder if not fitting sequences have been found (e.g. 01100109)
    #     if os.path.exists(f"{pathToCleanMRIList}/{patientID}"):
    #         os.rmdir(f"{pathToCleanMRIList}/{patientID}")
    
    if (first_rejection_sequences or T1_rejected_sequences or T2_rejected_sequences or FLAIR_rejected_sequences or diffusion_rejected_sequences):
        with open(f"{pathToCleanMRIList}/{patientID}/rejected_sequences.txt", "x") as f:
            f.write('''first_rejection: {t1r}
                    
T1 rejected sequences: {t1r}
                    
T2 rejected sequences: {t2r}
                    
FLAIR rejected sequences: {flairr}
                    
Diffusion rejected sequences: {diffr}'''.format(firstr = first_rejection_sequences,
                                                t1r = T1_rejected_sequences,
                                                t2r = T2_rejected_sequences,
                                                flairr = FLAIR_rejected_sequences,
                                                diffr = diffusion_rejected_sequences))

    sequences_list = [T1_sequences, T1CE_sequences, T2_sequences, FLAIR_sequences, DWI_sequences, ADC_sequences]
    for sequence_array in sequences_list:
        if len(sequence_array) > 1:
            print(f"WARNING {patientID}: {len(sequence_array)} sequences of same type, {sequence_array}")

            # append sequences to sequence_duplicates.txt
            file = open(f"{pathToCleanMRIList}/sequence_duplicates.txt", "a")
            file.write(f"{patientID}: {len(sequence_array)}#, {sequence_array}\n")
            file.close()


print("done")

  7%|▋         | 2/30 [00:00<00:02, 13.60it/s]

T1 mp-rage3d we sag 1mm KM_MPR_Tra - 5
T1 mp-rage3d we sag 1mm KM - 3
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 4
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14


 13%|█▎        | 4/30 [00:01<00:09,  2.85it/s]

PosDisp- [5] T2 flair tra - 5001
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
PosDisp- [10] T1 se tra schnell on [2] - 1034


 17%|█▋        | 5/30 [00:01<00:07,  3.32it/s]

PosDisp- [9] T2 flair tra schnell - 5001
T1 mp-rage3d we sag 1mm KM - 17
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 18
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14


 27%|██▋       | 8/30 [00:01<00:04,  5.49it/s]

T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
PhoenixZIPReport - 99
T1 mp-rage3d we sag 1mm KM - 13
PosDisp- [5] T2 flair tra schnell - 5001
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14


 33%|███▎      | 10/30 [00:01<00:03,  6.45it/s]

T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
PosDisp- [3] t2_flair_tra - 5002
PosDisp- [9] t1_se_r_tra_KM - 5008
PosDisp- [4] t2_tse_tra - 5003
MPR sag - 5009
Localizers - 1
PosDisp- [2] t2_tse_sag_5mm - 5001
PosDisp- [5] t1_se_r_tra - 5004
PosDisp- [7] ep2d_diff_tra_ADC - 5005
PosDisp- [8] t1_mprage_sag_p2_iso_KM - 5006
t2_tse_sag_5mm - 2
MPR cor - 5007
t1_mprage_sag_p2_iso_KM - 8


 40%|████      | 12/30 [00:02<00:02,  7.78it/s]

PosDisp- [5] T2 flair tra schnell - 5001
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
Localizers - 4
T1 mp-rage3d we sag 1mm KM - 13
PosDisp- [5] T2 flair tra schnell - 5001
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
TOF RL - 802
TOF FH - 803
T1 sag - 601
T1 sag KM SPIR - 1201
3D_T1 cor KM - 1102
3D T1 TFE KM sag - 1101


 50%|█████     | 15/30 [00:02<00:01,  7.53it/s]

isoDWI1000 - 202
Localizers - 101
sSTIR sag Dixon - 702
sT2 sag Dixon - 703
Mean_&_t-Maps - 21
Mean_&_t-Maps - 27
Mean_&_t-Maps - 16
EvaSeries_GLM - 15
Field mapping - 10
fMRI Antonymgenerierung - 17
EvaSeries_GLM - 26
T2 space3d cor - 4
EvaSeries_GLM - 20
T1 mp-rage3d we sag 1mm KM_MPR_Tra - 30
T1 mp-rage3d we sag 1mm KM - 28
fMRI Satzgenerierung - 22
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 29
T2 space3d sag - 2
fMRI Verbgenerierung - 12
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14


 60%|██████    | 18/30 [00:02<00:01,  9.26it/s]

PosDisp- [5] T2 flair tra - 5001
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 12
T1 mp-rage3d we sag 1mm KM - 11
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14


 67%|██████▋   | 20/30 [00:02<00:01,  9.63it/s]

T1 mp-rage3d we sag 1mm KM - 13
PosDisp- [5] T2 flair tra schnell - 5001
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
T1 mp-rage3d we sag 1mm KM - 13
PosDisp- [5] T2 flair tra schnell - 5001
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
mpr cor - 7
T1 mp-rage3d we sag 1mm KM - 6


 73%|███████▎  | 22/30 [00:03<00:00, 11.23it/s]

T1 mp-rage3d we sag 1mm KM - 13
PosDisp- [5] T2 flair tra schnell - 5001
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
PosDisp- [5] T2 flair tra on [2] - 1008
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14


 80%|████████  | 24/30 [00:03<00:00, 11.27it/s]

keine Daten vorhanden - 1
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
cor - 602
cs_T1W_3D_TFE - 601


 93%|█████████▎| 28/30 [00:03<00:00, 10.30it/s]

Localizers - 101
dDWI_0_500_1000i SENSE - 302
T1 mp-rage3d we sag 1mm KM - 13
PosDisp- [5] T2 flair tra schnell - 5001
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
PosDisp- [13] T1 mp-rage3d we sag 1mm KM - 5002
PosDisp- [5] T2 flair tra schnell on [2] - 1008
PosDisp- [13] T1 mp-rage3d we sag 1mm KM on [2] - 1022
Evidence Documents MR Basic Reading - 1001
T1 mp-rage3d we sag 1mm KM - 13
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14


100%|██████████| 30/30 [00:03<00:00,  7.76it/s]

mpr cor - 5003
T1 mp-rage3d we sag 1mm KM - 10
T1 mp-rage3d we sag 1mm KM - 13
PosDisp- [5] T2 flair tra schnell - 5001
T1 mp-rage3d we sag 1mm KM_MPR_Cor - 14
done





### Create pandas Dataframe of all Patients and Sequences
patient:

ID: int

t1: bool

t1ce: bool

t2: bool

flair: bool

dwi: bool

dwi adc: bool


In [None]:
# loop through files of each patient in preprocessing0
# check if t1, t1ce, t2, flair, dwi, ace exist
# create list of boolean values for the existence of each sequence
# add list of [patientID, bool, bool, bool, bool, bool, bool] list of patients
# create dataframe based on step above
# ?
# profit

# run on all patients
# check how many actually have all sequences

Example File Structure below

In [None]:
# ├── 01001917
# │   ├── 01001917_T2_0
# │   │   ├── IM-1281-0001-0001.dcm
# │   │   ├── IM-1281-0002-0001.dcm
# ...
# │   │   ├── IM-1420-0162-0001.dcm
# │   │   └── IM-1420-0163-0001.dcm
# │   ├── 01001917_T2_1
# │   │   ├── IM-1288-0001-0001.dcm
# │   │   ├── IM-1288-0002-0001.dcm
# ...
# │   │   ├── IM-1427-0030-0001.dcm
# │   │   └── IM-1427-0031-0001.dcm
# │   └── sequences.txt
# ├── 01005097
# │   ├── 01005097_ADC_0
# │   │   ├── IM-2330-0001-0001.dcm
# │   │   ├── IM-2330-0002-0001.dcm
# ...
# │   │   ├── IM-2469-0026-0001.dcm
# │   │   └── IM-2469-0027-0001.dcm
# │   ├── 01005097_DWI_0
# │   │   ├── IM-2329-0001-0001.dcm
# │   │   ├── IM-2329-0002-0001.dcm
# ...
# │   │   ├── IM-2468-0026-0001.dcm
# │   │   └── IM-2468-0027-0001.dcm
# │   ├── 01005097_FLAIR_0
# │   │   ├── IM-2327-0001-0001.dcm
# │   │   ├── IM-2327-0002-0001.dcm
# ...
# │   │   ├── IM-2466-0026-0001.dcm
# │   │   └── IM-2466-0027-0001.dcm
# │   ├── 01005097_T1CE_0
# │   │   ├── IM-2333-0001-0001.dcm
# │   │   ├── IM-2333-0002-0001.dcm
# ...
# │   │   ├── IM-2472-0026-0001.dcm
# │   │   └── IM-2472-0027-0001.dcm
# │   ├── 01005097_T1_0
# │   │   ├── IM-2332-0001-0001.dcm
# │   │   ├── IM-2332-0002-0001.dcm
# ...
# │   │   ├── IM-2467-0026-0001.dcm
# │   │   └── IM-2467-0027-0001.dcm
# │   └── sequences.txt
# ├── 01009590
# │   ├── 01009590_ADC_0
# │   │   ├── IM-0866-0001-0001.dcm
# │   │   ├── IM-0866-0002-0001.dcm
# ...
# │   │   ├── IM-0866-0020-0001.dcm
# │   │   ├── IM-0866-0021-0001.dcm

In [13]:
patient_folders = os.listdir(pathToCleanMRIList)

list_of_patientSeqenceInfos = []

patient_folders.remove("sequence_duplicates.txt")

for patient in patient_folders:

    patientID = patient
    has_t1 = False
    has_t1ce = False
    has_t2 = False
    has_flair = False
    has_dwi = False
    has_adc = False
    duplicates = False # True if one sequence exists multiple times for a patient

    # get list of sequences
    sequences = [
        item for item in os.listdir(f"{pathToCleanMRIList}/{patient}") if os.path.isdir(f"{pathToCleanMRIList}/{patient}/{item}")
    ]

    for sequence in sequences:
        # sequence ID = T1 or T1CE or T2 ...
        sequenceID = sequence.split("_")[1]
        sequenceCounter = sequence.split("_")[2]

        # duplicates is True if more than one sequence exists
        if int(sequenceCounter) > 0:
            duplicates = True

        #print(f"sequence: {sequenceID}")

        if sequenceID == config.desiredSequences.T1.value:
            has_t1 = True
        elif sequenceID == config.desiredSequences.T1CE.value:
            has_t1ce = True
        elif sequenceID == config.desiredSequences.T2.value:
            has_t2 = True
        elif sequenceID == config.desiredSequences.FLAIR.value:
            has_flair = True
        elif sequenceID == config.desiredSequences.DWI.value:
            has_dwi = True
        elif sequenceID == config.desiredSequences.ADC.value:
            has_adc = True

    patientSequenceInfos = [has_t1, has_t1ce, has_t2, has_flair, has_dwi, has_adc, duplicates]
    list_of_patientSeqenceInfos.append(patientSequenceInfos)
    
patient_sequences_df = pd.DataFrame(
    list_of_patientSeqenceInfos,
    columns=[config.desiredSequences.T1.value,
             config.desiredSequences.T1CE.value,
             config.desiredSequences.T2.value,
             config.desiredSequences.FLAIR.value,
             config.desiredSequences.DWI.value,
             config.desiredSequences.ADC.value,
             "duplicates"],
    index=patient_folders
)

# Print results
patient_total_amount = len(patient_sequences_df)
print(f"Total Amount of Patients: {patient_total_amount}")

# has t1, t1ce, t2, flair
list_four_sequences = patient_sequences_df.query("T1 == True and T1CE == True and T2 == True and FLAIR == True").index.values
print(f"Amount of patients with t1, t1ce, t2, flair: {len(list_four_sequences)}")
# also has dwi
list_with_dwi_sequences = patient_sequences_df.query("T1 == True and T1CE == True and T2 == True and FLAIR == True and DWI == True").index.values
print(f"Amount of patients also with DWI: {len(list_with_dwi_sequences)}")
# also has ace
list_with_all_sequences = patient_sequences_df.query("T1 == True and T1CE == True and T2 == True and FLAIR == True and DWI == True and ADC == True").index.values
print(f"Amount of patients with all sequences: {len(list_with_all_sequences)}")

list_of_duplicates_IDs = patient_sequences_df.query("duplicates == True").index.values
print(f"Duplicates IDs: {list_of_duplicates_IDs}")


Total Amount of Patients: 364
Amount of patients with t1, t1ce, t2, flair: 283
Amount of patients also with DWI: 276
Amount of patients with all sequences: 257
Duplicates IDs: ['02148010' '01800184' '02063373' '01410317' '02034046' '02116290'
 '01391984' '02117549' '02046093' '01878062' '02062434' '01882989'
 '02090584' '02105939' '01190670' '01013277' '01021714' '02173772'
 '02184584' '01755816' '01943022' '01658206' '02194539' '01699532'
 '01563052' '01798755' '01760947' '01018613' '01360726' '01578955'
 '01764802' '01781732' '01072344' '02082498' '02010452' '01995695'
 '02009523' '02100970' '01548397' '02066445' '02066814' '02051037'
 '02108926' '01792771' '01670714' '01257796' '01955868' '01400779'
 '01484016' '01516702' '02140670' '01321873' '01443624' '02092074'
 '01819252' '01946271' '01001917' '01983233' '01641960' '01800439'
 '02145870' '01921604' '02161647' '01979317' '01482000' '02091120'
 '02210001' '02126982' '01659187' '01609293' '02140942' '02173158'
 '02119712' '0102199