# Notebook Synopsis:

- This notebook shows that there are 76 LGG folders
- Each LGG folder contains exactly 5 files, all with the extension .nii.gz
- Each folder contains exactly 1 of each of the following modalities in order:
- flair, seg, t1, t1ce, t2

In [2]:
import pathlib

### Path to LGG folders

In [3]:
lgg_path = pathlib.Path().cwd().joinpath("MICCAI_BraTS_2019_Data_Training", 
                                         "MICCAI_BraTS_2019_Data_Training", 
                                         "LGG"
)

In [4]:
lgg_path

WindowsPath('C:/Users/lucas/School/4850/leabra/MICCAI_BraTS_2019_Data_Training/MICCAI_BraTS_2019_Data_Training/LGG')

#### List of all LGG folder paths

In [7]:
lgg_brain_volume_folder_paths = sorted([ x for x in lgg_path.iterdir() ])

# 76 LGG folders


In [8]:
len(lgg_brain_volume_folder_paths)

76

### Each LGG folder should contain 5 files: t1, t2, t1ce, flair, seg... let's confirm

#### Look at all paths to LGG folders

In [18]:
#for folder_path in lgg_brain_volume_folder_paths:
#    print(folder_path)
#    print()

#### Find number of files in each LGG folder

In [11]:
unique_num_files_in_folders = set()

for folder_path in lgg_brain_volume_folder_paths:
    file_paths_in_folder = [x for x in folder_path.iterdir() ]
    
    unique_num_files_in_folders.add( len(file_paths_in_folder) )


#### Each LGG folder contains exactly 5 files

In [12]:
unique_num_files_in_folders

{5}

# Do these 5 files in each LGG folder have the expected extensions?
# ie. every file should end with .nii.gz

#### If no errors are printed from this cell, then every LGG file has extension .nii.gz

In [13]:
# This matches the format that .suffixes will return
expected_extensions = ['.nii', '.gz']

all_lgg_file_paths = []

for folder_path in lgg_brain_volume_folder_paths:
    
    file_paths_in_folder = [x for x in folder_path.iterdir() ]
    
    for file_path in file_paths_in_folder:
        
        all_lgg_file_paths.append(file_path)
        
        actual_extensions = file_path.suffixes
        
        # This will get called if a single file in the 
        if actual_extensions != expected_extensions:
            print("Error, encountered:", actual_extensions, "at", file_path)

#### Checking LGG file extensions one more time with list filtering

In [14]:
wrong_extensions = list(filter(lambda x: x.suffixes != ['.nii', '.gz'], all_lgg_file_paths))

#### If list is exmpty then every LGG file has extension .nii.gz

In [15]:
wrong_extensions

[]

# Do these 5 files in each LGG folder contain the expected modalities?
# ie. t1, t2, t1ce, flair, seg

#### Check that every LGG folder contains 1 of each modality in this order:
#### flair.nii.gz,  seg.nii.gz, t1.nii.gz, t1ce.nii.gz, t2.nii.gz

In [16]:
has_correct_modalities_and_order = set()

for folder_path in lgg_brain_volume_folder_paths:
    
    file_paths_in_folder = [x for x in folder_path.iterdir() ]

    for i in range(len(file_paths_in_folder)):
        
        if i == 0:
            has_correct_modalities_and_order.add( 
                all_lgg_file_paths[i].match("*flair.nii.gz") 
            )

        elif i == 1:
            has_correct_modalities_and_order.add( 
                all_lgg_file_paths[i].match("*seg.nii.gz") 
            )
        
        elif i == 2:
            has_correct_modalities_and_order.add( 
                all_lgg_file_paths[i].match("*t1.nii.gz") 
            )
        
        elif i == 3:
            has_correct_modalities_and_order.add( 
                all_lgg_file_paths[i].match("*t1ce.nii.gz") 
            )
        
        elif i == 4:
            has_correct_modalities_and_order.add( 
                all_lgg_file_paths[i].match("*t2.nii.gz") 
            )

#### This set will only contain True, if in each LGG folder, 1 of each modality is present in the expected order

In [17]:
has_correct_modalities_and_order

{True}