In [None]:
import numpy as np
from numpy.ma.core import argmax
from tqdm import tqdm
from PIL import Image

In [None]:
def compute_class_frequencies(label_image_paths, num_classes):
    """
    Compute class frequencies for each pixel location based on label images.

    Parameters:
        label_image_paths (list): List of file paths to the label images.
        num_classes (int): Total number of classes.

    Returns:
        np.ndarray: Class probabilities of shape (height, width, num_classes).
    """
    # Load the first label image to determine the dimensions
    sample_image = Image.open(label_image_paths[0])
    height, width = sample_image.size[::-1]  # PIL gives size as (width, height)

    # Initialize an array to store class counts
    class_counts = np.zeros((height, width, num_classes))

    # Iterate over all label image paths
    for path in tqdm(label_image_paths, desc="Computing class frequencies"):
        # Load the label image as a NumPy array
        label = np.array(Image.open(path))

        # Update class counts for each class
        for c in range(num_classes):
            class_counts[:, :, c] += (label == c)

    # Normalize counts to get probabilities
    class_probabilities = class_counts / len(label_image_paths)
    return class_probabilities

In [41]:
# Example usage
from glob import glob

# Assume label images are stored in a directory
label_image_paths = glob("../amos22/Train/label/*.png")  # Adjust file extension as needed
num_classes = 16  # Example number of classes

class_probabilities = compute_class_frequencies(label_image_paths, num_classes)
class_probabilities

Computing class frequencies: 100%|██████████| 26283/26283 [11:21<00:00, 38.57it/s]


array([[[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]],

       [[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]],

       [[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0.

In [42]:
def evaluate_baseline(test_label_paths, class_probabilities, num_classes):
    """
    Evaluate the baseline performance using computed class probabilities.

    Parameters:
        test_label_paths (list): List of file paths to the test label images.
        class_probabilities (np.ndarray): Array of shape (height, width, num_classes).
        num_classes (int): Total number of classes.

    Returns:
        dict: Dictionary containing evaluation metrics (pixel accuracy, mIoU, F1-score).
    """
    total_pixels = 0
    correct_pixels = 0
    iou_sum = np.zeros(num_classes)
    true_positives = np.zeros(num_classes)
    false_positives = np.zeros(num_classes)
    false_negatives = np.zeros(num_classes)

    for path in tqdm(test_label_paths, desc="Evaluating test data"):
        # Load the test label image
        ground_truth = np.array(Image.open(path))

        # Get the predicted segmentation map
        predicted_map = np.argmax(class_probabilities, axis=-1)

        # Flatten ground truth and prediction maps for metrics
        gt_flat = ground_truth.flatten()
        pred_flat = predicted_map.flatten()

        # Compute metrics
        total_pixels += len(gt_flat)
        correct_pixels += np.sum(gt_flat == pred_flat)

        for c in range(num_classes):
            true_positives[c] += np.sum((gt_flat == c) & (pred_flat == c))
            false_positives[c] += np.sum((gt_flat != c) & (pred_flat == c))
            false_negatives[c] += np.sum((gt_flat == c) & (pred_flat != c))

    # Pixel accuracy
    pixel_accuracy = correct_pixels / total_pixels

    # Compute IoU for each class and mean IoU
    iou_sum = true_positives / (true_positives + false_positives + false_negatives + 1e-10)
    mean_iou = np.mean(iou_sum)

    # Compute F1-score for each class
    f1_scores = 2 * true_positives / (2 * true_positives + false_positives + false_negatives + 1e-10)

    return {
        "Pixel Accuracy": pixel_accuracy,
        "Mean IoU": mean_iou,
        "Class-wise IoU": iou_sum,
        "Class-wise F1-scores": f1_scores
    }

In [43]:
# Example usage
test_label_paths = glob("../amos22/Validation/label/*.png")  # Adjust path
test_label_paths = test_label_paths[:200]
metrics = evaluate_baseline(test_label_paths, class_probabilities, num_classes)

# Print metrics
print("Baseline Metrics:")
print(f"Pixel Accuracy: {metrics['Pixel Accuracy']:.4f}")
print(f"Mean IoU: {metrics['Mean IoU']:.4f}")
print("Class-wise IoU:\n", metrics["Class-wise IoU"])
print("Class-wise F1-scores:\n", metrics["Class-wise F1-scores"])

Evaluating test data: 100%|██████████| 6481/6481 [01:57<00:00, 55.33it/s]

Baseline Metrics:
Pixel Accuracy: 0.9542
Mean IoU: 0.0596
Class-wise IoU:
 [0.95416275 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.        ]
Class-wise F1-scores:
 [0.97654379 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.        ]





In [44]:
def evaluate_baseline_top_k_metrics(test_label_paths, class_probabilities, num_classes, top_k=3):
    """
    Evaluate the baseline performance using top-k metrics for all relevant metrics.

    Parameters:
        test_label_paths (list): List of file paths to the test label images.
        class_probabilities (np.ndarray): Array of shape (height, width, num_classes).
        num_classes (int): Total number of classes.
        top_k (int): The k value for top-k metrics.

    Returns:
        dict: Dictionary containing top-k evaluation metrics (pixel accuracy, mIoU, F1-score).
    """
    total_pixels = 0
    correct_top_k_pixels = 0
    true_positives = np.zeros(num_classes)
    false_positives = np.zeros(num_classes)
    false_negatives = np.zeros(num_classes)

    for path in tqdm(test_label_paths, desc="Evaluating test data"):
        # Load the test label image
        ground_truth = np.array(Image.open(path))

        # Get top-k predictions
        top_k_predictions = np.argsort(-class_probabilities, axis=-1)[..., :top_k]

        # Flatten ground truth and top-k predictions for metrics
        gt_flat = ground_truth.flatten()
        top_k_flat = top_k_predictions.reshape(-1, top_k)

        total_pixels += len(gt_flat)

        # Top-k accuracy
        correct_top_k_pixels += np.sum([gt in top_k for gt, top_k in zip(gt_flat, top_k_flat)])

        # Compute true positives, false positives, and false negatives
        for c in range(num_classes):
            # Check if the true class (c) is in the top-k predictions
            is_true_class_in_top_k = [(gt == c) and (c in top_k) for gt, top_k in zip(gt_flat, top_k_flat)]
            true_positives[c] += sum(is_true_class_in_top_k)

            # False positives: Class c predicted in top-k but not the true class
            is_false_positive = [(c in top_k) and (gt != c) for gt, top_k in zip(gt_flat, top_k_flat)]
            false_positives[c] += sum(is_false_positive)

            # False negatives: True class c not in top-k predictions
            is_false_negative = [(gt == c) and (c not in top_k) for gt, top_k in zip(gt_flat, top_k_flat)]
            false_negatives[c] += sum(is_false_negative)

    # Compute Top-k Accuracy
    top_k_accuracy = correct_top_k_pixels / total_pixels

    # Compute IoU for each class and mean IoU
    iou_per_class = true_positives / (true_positives + false_positives + false_negatives + 1e-10)
    mean_iou = np.mean(iou_per_class)

    # Compute F1-score for each class
    f1_per_class = 2 * true_positives / (2 * true_positives + false_positives + false_negatives + 1e-10)

    return {
        "Top-k Accuracy": top_k_accuracy,
        "Mean IoU (Top-k)": mean_iou,
        "Class-wise IoU (Top-k)": iou_per_class,
        "Class-wise F1-scores (Top-k)": f1_per_class
    }

In [None]:
# Example usage
test_label_paths = glob("../amos22/Test/label/*.png")  # Adjust path
test_label_paths = test_label_paths[:200]
top_k = 3  # Check top-3 predictions
metrics = evaluate_baseline_top_k_metrics(test_label_paths, class_probabilities, num_classes, top_k=top_k)

# Print metrics
print("Baseline Metrics (Top-k):")
print(f"Top-{top_k} Accuracy: {metrics['Top-k Accuracy']:.4f}")
print(f"Mean IoU (Top-{top_k}): {metrics['Mean IoU (Top-k)']:.4f}")
print("Class-wise IoU (Top-k):", metrics["Class-wise IoU (Top-k)"])
print("Class-wise F1-scores (Top-k):", metrics["Class-wise F1-scores (Top-k)"])

Evaluating test data:   5%|▌         | 10/200 [03:18<1:03:05, 19.92s/it]