In [3]:
import os
from pathlib import Path
import random
from monai.transforms import (
    Compose,
    LoadImaged,
    EnsureChannelFirstd,
    ScaleIntensityd
)
from monai.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches


# Define the path to the data directory
root_directory = Path().resolve().parent                            # Main project directory
data_directory = root_directory / 'data' / 'raw' / 'training_data'  # Data directory

# List all subfolders in the data directory
folders = [f for f in data_directory.iterdir() if f.is_dir()]

# Select the random folder
bool_random = False
first_folder = folders[random.randint(0, len(folders)-1)if bool_random else 0]

# Define the paths to the image and mask files
image_files = {
    't1c': str(first_folder / f'{first_folder.name}-t1c.nii.gz'),
    't1n': str(first_folder / f'{first_folder.name}-t1n.nii.gz'),
    't2f': str(first_folder / f'{first_folder.name}-t2f.nii.gz'),
    't2w': str(first_folder / f'{first_folder.name}-t2w.nii.gz'),
    'seg': str(first_folder / f'{first_folder.name}-seg.nii.gz')
}

# Create a list of files to process
files = [{'t1c': image_files['t1c'], 't1n': image_files['t1n'], 't2f': image_files['t2f'], 't2w': image_files['t2w'], 'label': image_files['seg']}]

# Define the transformations
transforms = Compose([
    LoadImaged(keys=['t1c', 't1n', 't2f', 't2w', 'label']),
    EnsureChannelFirstd(keys=['t1c', 't1n', 't2f', 't2w', 'label']),
    ScaleIntensityd(keys=['t1c', 't1n', 't2f', 't2w'])
])

# Create a dataset and dataloader
dataset = Dataset(data=files, transform=transforms)
loader = DataLoader(dataset, batch_size=1)

# Load a sample batch
batch = next(iter(loader))

# Extract image and label data
images = {key: batch[key][0][0] for key in ['t1c', 't1n', 't2f', 't2w']}
label = batch['label'][0][0]

# Define the color map for labels
label_colors = {
    0: [0, 0, 0],         # Background
    1: [255, 0, 0],       # Non-enhancing tumor core (NETC) - Red
    2: [0, 255, 0],       # Surrounding non-enhancing FLAIR hyperintensity (SNFH) - Green
    3: [0, 0, 255],       # Enhancing tissue (ET) - Blue
    4: [255, 255, 0]      # Resection cavity (RC) - Yellow
}

# Labels description
label_descriptions = {
    0: 'Background',
    1: 'Non-enhancing tumor core (NETC)',
    2: 'Surrounding non-enhancing FLAIR hyperintensity (SNFH)',
    3: 'Enhancing tissue (ET)',
    4: 'Resection cavity (RC)'
}

In [None]:


# Function to color the label
def color_label(label, colors):
    colored_label = np.zeros((*label.shape, 3), dtype=np.uint8)
    for lbl, color in colors.items():
        colored_label[label == lbl] = color
    return colored_label

# Function to plot images with a colored label mask
def plot_slices(slice_idx, images, label):
    fig, axs = plt.subplots(1, 5, figsize=(20, 5))
    modalities = ['t1c', 't1n', 't2f', 't2w']
    titles = ['T1c', 'T1n', 'T2f', 'T2w', 'Mask']
    
    for i, modality in enumerate(modalities):
        axs[i].imshow(images[modality][:, :, slice_idx], cmap='gray', origin='lower')
        axs[i].set_title(titles[i])
        axs[i].axis('off')

    colored_label = color_label(label[:, :, slice_idx], label_colors)
    axs[4].imshow(images['t1c'][:, :, slice_idx], cmap='gray', origin='lower')
    axs[4].imshow(colored_label, alpha=0.5, origin='lower')
    axs[4].set_title(titles[4])
    axs[4].axis('off')

    # Add a legend
    handles = [mpatches.Patch(color=np.array(color)/255, label=f"{lbl} - {label_descriptions[lbl]}") for lbl, color in label_colors.items() if lbl != 0]
    fig.legend(handles=handles, loc='center left', bbox_to_anchor=(1, 0.5), title="Labels")
    
    plt.suptitle(f"Slice Index: {slice_idx}")
    plt.show()

for i in range(50, images['t1c'].shape[2]-40, 10):
    plot_slices(i, images, label)


In [10]:
from monai.transforms import LoadImage
# this takes 10 minutes to run

# Function to count the number of pixels for each label in a segmentation file
def count_labels(segmentation):
    unique, counts = np.unique(segmentation, return_counts=True)
    return dict(zip(unique, counts))

# Aggregate label counts from all patients
total_counts = {label: 0 for label in label_descriptions.keys()}

# Iterate through each folder and aggregate the counts for each label
for folder in folders:
    label_file = str(folder / f'{folder.name}-seg.nii.gz')
    if os.path.exists(label_file):
        # Load the label image
        label_data = LoadImage(image_only=True)(label_file)
        label_counts = count_labels(label_data)
        for label, count in label_counts.items():
            total_counts[label] += count

# Calculate the total number of pixels across all labels
total_pixels = sum(total_counts.values())

# Print the counts and percentages for each label
print("Label - Counts - % of All")
for label, count in total_counts.items():
    percentage = (count / total_pixels) * 100
    print(f"{label_descriptions[label]} - {count} - {percentage:.2f}%")


Label - Counts - % of All
Background - 9649363112 - 98.98%
Non-enhancing tumor core (NETC) - 2285704 - 0.02%
Surrounding non-enhancing FLAIR hyperintensity (SNFH) - 66476419 - 0.68%
Enhancing tissue (ET) - 11442955 - 0.12%
Resection cavity (RC) - 18825010 - 0.19%


In [13]:
import torch

# Calculate weights, excluding the background (label 0)
total_non_background_pixels = total_pixels - total_counts[0]
class_weights = {label: total_non_background_pixels / count for label, count in total_counts.items() if label != 0}

# Normalize weights
total_weight = sum(class_weights.values())
normalized_class_weights = {label: weight / total_weight for label, weight in class_weights.items()}

print("\nClass Weights:")
for label, weight in normalized_class_weights.items():
    print(f"{label_descriptions[label]} - {weight:.4f}")

# Convert class weights to a PyTorch tensor
class_weights_tensor = torch.tensor([normalized_class_weights[label] for label in sorted(normalized_class_weights.keys())], dtype=torch.float32)

print("\nClass Weights as PyTorch Tensor:")
print(class_weights_tensor)



Class Weights:
Non-enhancing tumor core (NETC) - 0.7377
Surrounding non-enhancing FLAIR hyperintensity (SNFH) - 0.0254
Enhancing tissue (ET) - 0.1474
Resection cavity (RC) - 0.0896

Class Weights as PyTorch Tensor:
tensor([0.7377, 0.0254, 0.1474, 0.0896])


In [14]:
import os
from pathlib import Path
import shutil
import random

# Define the path to the data directory
root_directory = Path().resolve().parent                            # Main project directory
data_directory = root_directory / 'data' / 'raw' / 'training_data'  # Data directory
sample_data_directory = root_directory / 'data_sample'              # Directory for sample data

# Ensure the sample data directory and subdirectories exist
(sample_data_directory / 'training_data').mkdir(parents=True, exist_ok=True)
(sample_data_directory / 'validation_data').mkdir(parents=True, exist_ok=True)

# List all subfolders in the data directory
training_data = [f for f in data_directory.iterdir() if f.is_dir()]
validation_data = [f for f in data_directory.iterdir() if f.is_dir()]

# Check if the sample data directory is empty
if len(list(sample_data_directory.glob('*'))) > 0:
    raise ValueError("Sample data directory is not empty. Please clear the directory before running this code.")

# Function to create a sample of data
def create_sample(folders, sample_dir, sample_size=0.01):
    sample_folders = random.sample(folders, max(1, int(len(folders) * sample_size)))

    if len(list(sample_dir.glob('*'))) > 0:
        return print(f"Sample data directory is not empty. Please clear the directory before running this code.")

    for folder in sample_folders:
        # Copy the folder to the sample data directory
        sample_folder_path = sample_dir / folder.name
        if not sample_folder_path.exists():
            shutil.copytree(folder, sample_folder_path)

# Create samples of each dataset
create_sample(training_data, sample_data_directory / 'training_data')
create_sample(validation_data, sample_data_directory / 'validation_data')

print(f"Sample data created in {sample_data_directory}")


Sample data created in C:\Users\mnem1\.programowanie\ES-Segmentation-Adult-Glioma\data_sample
