# Notebook Synopsis:

- This notebook shows that there are 259 HGG folders
- Each HGG folder contains exactly 5 files, all with the extension .nii.gz
- Each folder contains exactly 1 of each of the following modalities in order:
- flair, seg, t1, t1ce, t2

In [1]:
import pathlib

### Path to HGG folders

In [2]:
hgg_path = pathlib.Path().cwd().joinpath("MICCAI_BraTS_2019_Data_Training", 
                                         "MICCAI_BraTS_2019_Data_Training", 
                                         "HGG"
)

In [3]:
hgg_path

WindowsPath('C:/Users/lucas/School/4850/leabra/MICCAI_BraTS_2019_Data_Training/MICCAI_BraTS_2019_Data_Training/HGG')

#### List of all HGG folder paths

In [4]:
hgg_brain_volume_folder_paths = [ x for x in hgg_path.iterdir() ]

# 259 HGG folders


In [5]:
len(hgg_brain_volume_folder_paths)

259

### Each HGG folder should contain 5 files: t1, t2, t1ce, flair, seg... let's confirm

#### Look at all paths to HGG folders

In [6]:
for folder_path in hgg_brain_volume_folder_paths:
    print(folder_path)
    print()

C:\Users\lucas\School\4850\leabra\MICCAI_BraTS_2019_Data_Training\MICCAI_BraTS_2019_Data_Training\HGG\BraTS19_2013_10_1

C:\Users\lucas\School\4850\leabra\MICCAI_BraTS_2019_Data_Training\MICCAI_BraTS_2019_Data_Training\HGG\BraTS19_2013_11_1

C:\Users\lucas\School\4850\leabra\MICCAI_BraTS_2019_Data_Training\MICCAI_BraTS_2019_Data_Training\HGG\BraTS19_2013_12_1

C:\Users\lucas\School\4850\leabra\MICCAI_BraTS_2019_Data_Training\MICCAI_BraTS_2019_Data_Training\HGG\BraTS19_2013_13_1

C:\Users\lucas\School\4850\leabra\MICCAI_BraTS_2019_Data_Training\MICCAI_BraTS_2019_Data_Training\HGG\BraTS19_2013_14_1

C:\Users\lucas\School\4850\leabra\MICCAI_BraTS_2019_Data_Training\MICCAI_BraTS_2019_Data_Training\HGG\BraTS19_2013_17_1

C:\Users\lucas\School\4850\leabra\MICCAI_BraTS_2019_Data_Training\MICCAI_BraTS_2019_Data_Training\HGG\BraTS19_2013_18_1

C:\Users\lucas\School\4850\leabra\MICCAI_BraTS_2019_Data_Training\MICCAI_BraTS_2019_Data_Training\HGG\BraTS19_2013_19_1

C:\Users\lucas\School\4850\leabr

#### Find number of files in each HGG folder

In [7]:
unique_num_files_in_folders = set()

for folder_path in hgg_brain_volume_folder_paths:
    file_paths_in_folder = [x for x in folder_path.iterdir() ]
    
    unique_num_files_in_folders.add( len(file_paths_in_folder) )


#### Each HGG folder contains exactly 5 files

In [8]:
unique_num_files_in_folders

{5}

# Do these 5 files in each HGG folder have the expected extensions?
# ie. every file should end with .nii.gz

#### If no errors are printed from this cell, then every HGG file has extension .nii.gz

In [9]:
# This matches the format that .suffixes will return
expected_extensions = ['.nii', '.gz']

all_hgg_file_paths = []

for folder_path in hgg_brain_volume_folder_paths:
    
    file_paths_in_folder = [x for x in folder_path.iterdir() ]
    
    for file_path in file_paths_in_folder:
        
        all_hgg_file_paths.append(file_path)
        
        actual_extensions = file_path.suffixes
        
        # This will get called if a single file in the 
        if actual_extensions != expected_extensions:
            print("Error, encountered:", actual_extensions, "at", file_path)

#### Checking HGG file extensions one more time with list filtering

In [10]:
wrong_extensions = list(filter(lambda x: x.suffixes != ['.nii', '.gz'], all_hgg_file_paths))

#### If list is exmpty then every HGG file has extension .nii.gz

In [11]:
wrong_extensions

[]

# Do these 5 files in each HGG folder contain the expected modalities?
# ie. t1, t2, t1ce, flair, seg

#### Check that every HGG folder contains 1 of each modality in this order:
#### flair.nii.gz,  seg.nii.gz, t1.nii.gz, t1ce.nii.gz, t2.nii.gz

In [12]:
has_correct_modalities_and_order = set()

for folder_path in hgg_brain_volume_folder_paths:
    
    file_paths_in_folder = [x for x in folder_path.iterdir() ]

    for i in range(len(file_paths_in_folder)):
        
        if i == 0:
            has_correct_modalities_and_order.add( 
                all_hgg_file_paths[i].match("*flair.nii.gz") 
            )

        elif i == 1:
            has_correct_modalities_and_order.add( 
                all_hgg_file_paths[i].match("*seg.nii.gz") 
            )
        
        elif i == 2:
            has_correct_modalities_and_order.add( 
                all_hgg_file_paths[i].match("*t1.nii.gz") 
            )
        
        elif i == 3:
            has_correct_modalities_and_order.add( 
                all_hgg_file_paths[i].match("*t1ce.nii.gz") 
            )
        
        elif i == 4:
            has_correct_modalities_and_order.add( 
                all_hgg_file_paths[i].match("*t2.nii.gz") 
            )

#### This set will only contain True, if in each HGG folder, 1 of each modality is present in the expected order

In [13]:
has_correct_modalities_and_order

{True}