**Andere manier om te kijken naar evaluation, i.p.v terug te splitten en te predicten, nu opgeslagen predicition gebruiken**

In [1]:
import pandas as pd
import torch
import numpy as np
import torch.nn.functional as F
import os
import json  # Needed for loading the mappings

# === Define path to the saved run ===
save_path = r"C:\Users\corne\OneDrive - KU Leuven\Thesis\Working Code\SAVED-Models\GroNLP\Run_2025-04-11_18-10"

# ✅ Load the saved test predictions 
df = pd.read_csv(os.path.join(save_path, "test_predictions.csv"))

# ✅ Recreate logits tensor from the CSV
logits = torch.tensor(df["logits"].apply(eval).tolist())

# ✅ Apply softmax to get prediction probabilities
probabilities = F.softmax(logits, dim=1)

# ✅ Extract raw values
texts = df["text"].tolist()
true_labels_ids = df["true_label"].tolist()
predicted_label_ids = df["predicted_label"].tolist()

# ✅ Convert label IDs to themes using the mappings
with open(os.path.join(save_path, "label_mappings.json"), "r", encoding="utf-8") as f:
    mappings = json.load(f)

theme_to_id = mappings["theme_to_id"]
id_to_theme = {int(k): v for k, v in mappings["id_to_theme"].items()}  # convert keys back to int


**Unknowns**

In [2]:
# Find most confidently wrong predictions
probs_np = probabilities.numpy()
confidences = probs_np.max(axis=1)

errors = []
for i in range(len(texts)):
    if true_labels_ids[i] != predicted_label_ids[i]:
        errors.append((confidences[i], texts[i], id_to_theme[true_labels_ids[i]], id_to_theme[predicted_label_ids[i]]))

# Sort by confidence descending
errors.sort(reverse=True)

# Show top 5
for confidence, text, true_theme, predicted_theme in errors[:5]:
    print(f"🧠 Confidence: {confidence:.2f}")
    print(f"❌ True: {true_theme} | Predicted: {predicted_theme}")
    print(f"💬 Text: {text}")
    print("-" * 50)


🧠 Confidence: 1.00
❌ True: Milieu en Landbouw | Predicted: Mobiliteit en Infrastructuur
💬 Text: Welke timing stelt de minister voorop voor een beslissing over de vergunningsaanvraag voor de nieuwe stelplaats?
--------------------------------------------------
🧠 Confidence: 1.00
❌ True: Milieu en Landbouw | Predicted: Mobiliteit en Infrastructuur
💬 Text: Hoeveel van het aantal ongevallen gebeurden op het bedrijf, op het veld, op de weg, … in 2020, 2021, 2022 en 2023 tot op heden?
--------------------------------------------------
🧠 Confidence: 1.00
❌ True: Brussel en de Vlaamse Rand | Predicted: Mobiliteit en Infrastructuur
💬 Text: Welke projecten zijn al opgenomen in het doorkijkscenario naar het GIP 2023 en GIP 2024?
--------------------------------------------------
🧠 Confidence: 1.00
❌ True: Onderwijs en Samenleving | Predicted: Mobiliteit en Infrastructuur
💬 Text: Op welke manier worden chauffeurs opgeleid om ondersteuning en bijstand te geven Op welke manier worden chauffeurs tijd

In [3]:
true_labels = [id_to_theme[i] for i in true_labels_ids]
predicted_labels = [id_to_theme[i] for i in predicted_label_ids]
correct = [true == pred for true, pred in zip(true_labels, predicted_labels)]

output_df = pd.DataFrame({
    "text": texts,
    "true_label": true_labels,
    "predicted_label": predicted_labels,
    "is_correct": correct,
    "confidence": confidences
})

# ✅ Sort: incorrect first, then by highest confidence
output_df = output_df.sort_values(by=["is_correct", "confidence"], ascending=[True, False])

# ✅ Optional: add ranking
output_df["rank"] = range(1, len(output_df) + 1)

# ✅ Save to Excel
excel_path = os.path.join(os.getcwd(), "prediction_confidence_report.xlsx")
output_df.to_excel(excel_path, index=False)
print(f"✅ Sorted report saved to: {excel_path}")


✅ Sorted report saved to: c:\Users\corne\Documents\thesis-question-classification\ConfidenceHandling\prediction_confidence_report.xlsx
