Notebook to evaluate BraTS18 image data based on segmentation label ratios

Goal is to filter out only samples above a cerain theshold.

In [1]:
import numpy as np
import os

In [2]:
# Replace 'your_folder_path' with the path to your folder
folder_path = 'npy/'

# valid files list
valid_image_list = []
# List all files in the folder
files = os.listdir(folder_path)

# Filter for .npy files
npy_files = [f for f in files if f.lower().endswith('.npy')]

# List to store the names of each valid file
valid_sample_brats = []
not_valid_sample_brats = []
file_names = []

# Loop through each .npy file
for npy_file in npy_files:
    # Load the .npy file
    file_path = os.path.join(folder_path, npy_file)
    image_array = np.load(file_path)

    # Extract the name without the extension
    file_name = os.path.splitext(npy_file)[0]
    
    # Append the name to the list
    file_names.append(file_name)

    # Optionally, print the array shape or perform other operations
    print(f'Loaded image: {file_name}')
    print(f'Image shape: {image_array.shape}')

    # count all elements of seg max and the individual labels
    counts = np.unique(image_array[4], return_counts=True)
    
    try:
        sum_total = np.sum(counts)

    
        count_1 = counts[1][1]
        count_2 = counts[1][2]
        count_3 = counts[1][3]

        # calculate ratio
        ratio_1 = count_1 / sum_total
        ratio_2 = count_2 / sum_total
        ratio_3 = count_3 / sum_total
        ratio_list = [ratio_1, ratio_2, ratio_3]

        # filter for threshold and append to list accordingly
        if ratio_1 > 0.0005 and ratio_2 > 0.0005 and ratio_3 > 0.0005:
            valid_sample_brats.append(file_name)
        else:
            not_valid_sample_brats.append(file_name)
    except:
        not_valid_sample_brats.append(file_name)


Loaded image: Brats18_2013_0_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_10_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_11_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_12_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_14_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_17_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_19_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_20_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_21_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_22_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_23_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_27_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_28_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_2_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_3_1
Image shape: (5, 155, 240, 240)
Loaded image: Brats18_2013_4_1
Image shape:

In [5]:
print(len(valid_sample_brats))
print(len(not_valid_sample_brats))
print(valid_sample_brats[:5])

154
0
['Brats18_2013_0_1', 'Brats18_2013_10_1', 'Brats18_2013_11_1', 'Brats18_2013_12_1', 'Brats18_2013_14_1']


In [4]:
# Loop through each filename in the not_valid_sample_brats list
for file_name in not_valid_sample_brats:
    # Construct the full file path with .npy extension
    file_path = os.path.join(folder_path, file_name + '.npy')
    
    # Check if the file exists before attempting to delete
    if os.path.exists(file_path):
        # Delete the file
        os.remove(file_path)
        print(f'Deleted file: {file_path}')
    else:
        print(f'File not found: {file_path}')

In [6]:
import os

folder_path = 'npy/'

# Paths to the list files
train_list_path = os.path.join(folder_path, 'train_list.txt')
valid_list_path = os.path.join(folder_path, 'valid_list.txt')

def update_list_file(file_path, entries_to_keep):
    """Keep only specified entries in a list file."""
    if not os.path.exists(file_path):
        print(f'{file_path} does not exist.')
        return

    # Read the existing entries from the file
    with open(file_path, 'r') as file:
        entries = [line.strip() for line in file]

    # Print existing entries for debugging
    print(f'Original entries in {file_path}:')
    print(entries)

    # Keep only entries that are in the valid_sample_brats list
    updated_entries = [entry for entry in entries if entry in entries_to_keep]

    # Print updated entries for debugging
    print(f'Updated entries for {file_path}:')
    print(updated_entries)

    # Write the updated entries back to the file
    with open(file_path, 'w') as file:
        for entry in updated_entries:
            file.write(entry + '\n')

    print(f'Updated {file_path}')

# Update train_list.txt and valid_list.txt
update_list_file(train_list_path, valid_sample_brats)
update_list_file(valid_list_path, valid_sample_brats)

Original entries in npy/train_list.txt:
['Brats18_CBICA_AOP_1', 'Brats18_TCIA02_168_1', 'Brats18_TCIA05_277_1', 'Brats18_2013_2_1', 'Brats18_TCIA08_242_1', 'Brats18_CBICA_ARZ_1', 'Brats18_CBICA_AQD_1', 'Brats18_TCIA04_437_1', 'Brats18_TCIA04_192_1', 'Brats18_CBICA_AME_1', 'Brats18_CBICA_ARF_1', 'Brats18_CBICA_AXM_1', 'Brats18_CBICA_AQZ_1', 'Brats18_CBICA_AQJ_1', 'Brats18_TCIA03_265_1', 'Brats18_CBICA_ABN_1', 'Brats18_TCIA02_322_1', 'Brats18_CBICA_ANZ_1', 'Brats18_CBICA_ABO_1', 'Brats18_CBICA_AYU_1', 'Brats18_TCIA03_375_1', 'Brats18_TCIA06_372_1', 'Brats18_TCIA08_162_1', 'Brats18_CBICA_AQN_1', 'Brats18_2013_12_1', 'Brats18_CBICA_ASO_1', 'Brats18_TCIA02_605_1', 'Brats18_CBICA_AVJ_1', 'Brats18_TCIA02_471_1', 'Brats18_CBICA_AMH_1', 'Brats18_TCIA02_606_1', 'Brats18_TCIA01_448_1', 'Brats18_TCIA01_150_1', 'Brats18_CBICA_AQP_1', 'Brats18_CBICA_AQV_1', 'Brats18_TCIA02_430_1', 'Brats18_2013_7_1', 'Brats18_CBICA_AUN_1', 'Brats18_CBICA_AVV_1', 'Brats18_2013_20_1', 'Brats18_TCIA02_374_1', 'Brats18_

NameError: name 'updated_entries' is not defined