In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
from pathlib import Path

In [2]:
fp = "/home/Nele/code/ct_classifier_zoops/val_predictions_epoch_.json"
image_root = "/mnt/class_data/Nele/ct_train_data"
category_dict = json.load(open("/home/Nele/code/scripts/DataPrep_Classifier/category_dict.json", 'r'))
inverse_category_dict = {v: k for k, v in category_dict.items()}

In [None]:
results_dir = Path("/home/Nele/code/ct_classifier_zoops/runs/Jan23_03-24-49_cv4e-2026-student4/val_predictions")
files = sorted(results_dir.glob("val_predictions_epoch_*.json"))
files = files[::-1]  # reverse order to start with last epoch
print(f"Found {len(files)} files to process.")

for f in files[::1000]:
    fp = f"{results_dir}/{f.name}"
    ep_num_str = int(f.name.split('_')[-1].split('.')[0])
    print(f"Processing file: {fp} {f}")
    with open(fp, 'r') as f:
        data = json.load(f)
        predictions = data['predictions']
        ground_truths = data['labels']
        confidences = data['confidences']
        image_names = data['image_names']
    # convert lists to numpy arrays for easier manipulation
    predictions = np.array(predictions)
    ground_truths = np.array(ground_truths)
    # plot confusion matrix normalised over true labels
    cm = confusion_matrix(ground_truths, predictions, normalize='true', labels=list(inverse_category_dict.keys()))
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(f'Confusion Matrix ep:{ep_num_str}')
    plt.colorbar()
    tick_marks = np.arange(len(set(ground_truths)))
    plt.xticks(tick_marks, [inverse_category_dict[idx] for idx in tick_marks])
    # rotate x tick labels for better readability
    plt.xticks(rotation=45, ha='right')
    plt.yticks(tick_marks, [inverse_category_dict[idx] for idx in tick_marks])
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.tight_layout()
    plt.show()
    # plt.savefig('confusion_matrix.png')
    # plot histogram of confidences for correct and incorrect predictions
    correct_confidences = [confidences[i] for i in range(len(confidences)) if predictions[i] == ground_truths[i]]
    incorrect_confidences = [confidences[i] for i in range(len(confidences)) if predictions[i] != ground_truths[i]]
    plt.figure(figsize=(8, 6))
    plt.hist(correct_confidences, bins=20, alpha=0.7, label='Correct Predictions', color='g')
    plt.hist(incorrect_confidences, bins=20, alpha=0.7, label='Incorrect Predictions', color='r')
    plt.title(f'Prediction Confidences ep:{ep_num_str}')
    plt.xlabel('Confidence')
    plt.ylabel('Number of Predictions')
    plt.legend()
    plt.tight_layout()
    plt.show()
    # plot for each class the distribution of confidences for correct and incorrect predictions
    for class_idx in inverse_category_dict.keys():
        class_name = inverse_category_dict[class_idx]
        print(f"Processing class: {class_name}")
        correct_indices = [i for i in range(len(confidences)) if predictions[i] == ground_truths[i] == class_idx]
        incorrect_indices = [i for i in range(len(confidences)) if predictions[i] != class_idx and ground_truths[i] == class_idx]
        class_correct_confidences = [confidences[i] for i in correct_indices]
        class_incorrect_confidences = [confidences[i] for i in incorrect_indices]
        # class_correct_confidences = [confidences[i] for i in range(len(confidences)) if predictions[i] == ground_truths[i] == class_idx]
        # class_incorrect_confidences = [confidences[i] for i in range(len(confidences)) if ground_truths[i] == class_idx and predictions[i] != class_idx]
        plt.figure(figsize=(8, 6))
        plt.hist(class_correct_confidences, bins=20, alpha=0.7, label='Correct Predictions', color='g')
        plt.hist(class_incorrect_confidences, bins=20, alpha=0.7, label='Incorrect Predictions', color='r')
        plt.title(f'Prediction Confidences for class "{class_name}" ep:{ep_num_str}')
        plt.xlabel('Confidence')
        plt.ylabel('Number of Predictions')
        plt.legend()
        plt.tight_layout()
        plt.show()
        # show the 5 most confident correct and incorrect predictions for this class
        correct_indices_sorted = sorted(correct_indices, key=lambda i: confidences[i], reverse=True)[:5]
        incorrect_indices_sorted = sorted(incorrect_indices, key=lambda i: confidences[i], reverse=True)[:5]
        print(f"Class '{class_name}' - Top 5 Correct Predictions:")
        fig, ax = plt.subplots(1, 5, figsize=(15, 3))
        for i in correct_indices_sorted:
            print(f"  Image: {image_names[i]}, Confidence: {confidences[i]:.4f}")
            img = plt.imread(f"{image_root}/{image_names[i]}")
            ax[correct_indices_sorted.index(i)].imshow(img, cmap='gray')
            ax[correct_indices_sorted.index(i)].axis('off')
            ax[correct_indices_sorted.index(i)].set_title(f"Conf: {confidences[i]:.4f}")
        plt.suptitle(f"Class '{class_name}' - Top 5 Correct Predictions")
        plt.show()
        fig, ax = plt.subplots(1, 5, figsize=(15, 3))
        print(f"Class '{class_name}' - Top 5 Incorrect Predictions:")
        for i in incorrect_indices_sorted:
            print(f"  Image: {image_names[i]}, Confidence: {confidences[i]:.4f} (GT: {inverse_category_dict[ground_truths[i]]}) {inverse_category_dict[predictions[i]]}")
            img = plt.imread(f"{image_root}/{image_names[i]}")
            ax[incorrect_indices_sorted.index(i)].imshow(img, cmap='gray')
            ax[incorrect_indices_sorted.index(i)].axis('off')
            ax[incorrect_indices_sorted.index(i)].set_title(f"Conf: {confidences[i]:.4f} \ngt:{inverse_category_dict[ground_truths[i]]} pr:{inverse_category_dict[predictions[i]]}")
        plt.suptitle(f"Class '{class_name}' - Top 5 Incorrect Predictions\n")
        plt.show()

In [None]:
len(conf_arr)

In [None]:
## confidence score of the model per class
import numpy as np
import matplotlib.pyplot as plt

# Mean confidence for each true class
conf_arr = np.array(confidences)
gt = np.array(ground_truths)

class_ids = sorted(inverse_category_dict.keys())
class_names = [inverse_category_dict[c] for c in class_ids]
mean_conf_true = [conf_arr[gt == c].mean() if (gt == c).any() else np.nan for c in class_ids]
std_conf_true = [conf_arr[gt == c].std() if (gt == c).any() else np.nan for c in class_ids]

plt.figure(figsize=(10, 4))
plt.scatter(class_names, mean_conf_true, color="steelblue")
plt.errorbar(class_names, mean_conf_true, yerr=std_conf_true, fmt="o", color="red", alpha=0.5)
plt.title(f"Mean confidence by class ep:{ep_num_str}")
plt.ylabel("Mean confidence")
plt.xticks(rotation=45, ha="right")
plt.ylim(0, 1)
plt.grid(axis="y")
plt.grid(axis="x")
plt.tight_layout()
plt.show()

# It is the mean confidence score of each sample in this human label class, regardles of class it got predicted as. 
# It does not show what the scores are for the predicted classes



In [None]:
## confidence score of the model per class
import numpy as np
import matplotlib.pyplot as plt

# Mean confidence for each true class
conf_arr = np.array(confidences)
pred = np.array(predictions)

class_ids = sorted(inverse_category_dict.keys())
class_names = [inverse_category_dict[c] for c in class_ids]
mean_conf_true = [conf_arr[pred == c].mean() if (pred == c).any() else np.nan for c in class_ids]
std_conf_true = [conf_arr[pred == c].std() if (pred == c).any() else np.nan for c in class_ids]

plt.figure(figsize=(10, 4))
plt.scatter(class_names, mean_conf_true, color="steelblue")
plt.errorbar(class_names, mean_conf_true, yerr=std_conf_true, fmt="o", color="red", alpha=0.5)
plt.title(f"Mean confidence by class ep:{ep_num_str}")
plt.ylabel("Mean confidence")
plt.xticks(rotation=45, ha="right")
plt.ylim(0, 1)
plt.grid(axis="y")
plt.grid(axis="x")
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd

# Confusion matrix
cm = confusion_matrix(gt, pred)   # shape [C, C]

# Per-class accuracy
per_class_acc = cm.diagonal() / cm.sum(axis=1)
class_avg_acc = np.nanmean(per_class_acc)

# Table
df = pd.DataFrame({
    "Class": class_names,                 # list of class labels
    "Accuracy": per_class_acc
})

print(df)
print("\nClass-average accuracy:", class_avg_acc)
