<a href="https://colab.research.google.com/github/Sibusisongwenya/WIP-Project/blob/main/reporting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# utils/reporting.py

import os
import time
import logging
import numpy as np

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


def calculate_and_write_metrics(f_report, method_name, preds, labels, metrics, uncerts=None, conf_metrics=None, calibration_curve_data=None):
    """
    Calculates performance metrics and writes them to a report file.

    This function writes accuracy, binary metrics, uncertainty metrics, and calibration metrics.
    """
    try:
        # Ensure predictions and labels are squeezed (if possible)
        preds = preds.squeeze() if isinstance(preds, np.ndarray) and preds.ndim > 1 else preds
        labels = labels.squeeze() if isinstance(labels, np.ndarray) and labels.ndim > 1 else labels

        # Accuracy
        accuracy = np.mean(preds == labels)
        metrics[f'{method_name}_accuracy'] = accuracy
        f_report.write(f"{method_name} Accuracy: {accuracy:.4f}\n")

        # Binary Metrics (example: converting based on threshold)
        binary_labels = np.array([1 if label <= 1 else 0 for label in labels])
        binary_preds = np.array([1 if pred <= 1 else 0 for pred in preds])
        if len(np.unique(binary_labels)) >= 2:
            from sklearn.metrics import roc_auc_score, f1_score
            auc = roc_auc_score(binary_labels, binary_preds)
            metrics[f'{method_name}_auc'] = auc
            f_report.write(f"{method_name} AUC: {auc:.4f}\n")
            binary_acc = np.mean(binary_labels == binary_preds)
            metrics[f'{method_name}_binary_accuracy'] = binary_acc
            f_report.write(f"{method_name} Binary Accuracy: {binary_acc:.4f}\n")
            f1 = f1_score(binary_labels, binary_preds)
            metrics[f'{method_name}_f1'] = f1
            f_report.write(f"{method_name} F1 Score: {f1:.4f}\n")

        # Uncertainty Metrics
        if uncerts is not None:
            avg_uncertainty = np.mean(uncerts)
            metrics[f'{method_name}_avg_uncertainty'] = avg_uncertainty
            f_report.write(f"{method_name} Avg Uncertainty: {avg_uncertainty:.4f}\n")

        # Confidence Metrics (if provided)
        if conf_metrics:
            for conf, name in conf_metrics:
                avg_conf = np.mean(conf)
                metrics[f'{method_name}_avg_confidence_{name.lower()}'] = avg_conf
                f_report.write(f"{method_name} Avg Confidence ({name}): {avg_conf:.4f}\n")

        # Calibration Metrics (example using UCE)
        if calibration_curve_data is not None:
            from utils.evaluation import calculate_uce
            uce_val, _ = calculate_uce(labels, preds, uncerts)
            metrics[f'{method_name}_uce'] = uce_val
            f_report.write(f"{method_name} UCE: {uce_val:.2f}%\n")
    except Exception as e:
        logging.error(f"Error calculating metrics for {method_name}: {e}")


def write_patient_predictions(f, model_name, predictions, idx, true_label, class_labels):
    """
    Writes patient predictions for deterministic models.
    """
    if predictions is not None and idx < len(predictions):
        pred_class = predictions[idx]
        f.write(f"\n{model_name} Model Prediction:\n")
        f.write(f"  - Predicted Class: {pred_class} ({class_labels[pred_class]})\n")
        f.write(f"  - Correct: {'Yes' if pred_class == true_label else 'No'}\n")


def write_bayesian_predictions(f, model_name, predictions, idx, true_label, class_labels):
    """
    Writes patient predictions for Bayesian models, including uncertainties.
    """
    if predictions is not None and idx < len(predictions[0]):
        try:
            pred_class = predictions[0][idx]
            uncert = predictions[1][idx]
            f.write(f"\n{model_name} Bayesian Model:\n")
            f.write(f"  - Predicted Class: {pred_class} ({class_labels[pred_class]})\n")
            f.write(f"  - Uncertainty: {uncert:.2f}\n")
        except Exception as e:
            f.write(f"\nError writing {model_name} predictions: {e}\n")


def generate_patient_case_studies(test_loader, predictions_deterministic=None,
                                  predictions_bayesian_llsvi=None,
                                  predictions_bayesian_lldropout=None,
                                  labels=None, output_dir='output', max_patients=5, class_labels=None):
    """
    Generates patient-level case studies and saves them to a file.

    Args:
        test_loader: DataLoader for test samples.
        predictions_deterministic: Deterministic model predictions.
        predictions_bayesian_llsvi: Bayesian LL-SVI predictions tuple.
        predictions_bayesian_lldropout: Bayesian LL-Dropout predictions tuple.
        labels: Ground truth labels.
        output_dir: Directory to save the case studies.
        max_patients (int): Maximum number of patient cases to include.
        class_labels: Optional list of class names.
    """
    os.makedirs(output_dir, exist_ok=True)
    patient_report_file = os.path.join(output_dir, "patient_case_studies.txt")
    logging.info(f"Generating patient case studies: {patient_report_file}")
    if class_labels is None:
        class_labels = [f"Class {i}" for i in range(len(set(labels)))]
    with open(patient_report_file, 'w') as f:
        f.write("Patient Case Studies\n" + "=" * 50 + "\n")
        try:
            data_iter = iter(test_loader)
            patient_count = 0
            while patient_count < max_patients:
                try:
                    batch = next(data_iter)
                except StopIteration:
                    break
                if batch is None:
                    continue
                images, true_labels = batch
                for i in range(len(true_labels)):
                    if patient_count >= max_patients:
                        break
                    true_label = true_labels[i].item()
                    patient_id = f"Patient_Test_{patient_count+1:03d}"
                    f.write(f"\n--- Patient Case Study {patient_count+1} ---\n")
                    f.write(f"Patient ID: {patient_id}\n")
                    f.write(f"Ground Truth: {true_label} ({class_labels[true_label]})\n")
                    write_patient_predictions(f, "Deterministic", predictions_deterministic, i, true_label, class_labels)
                    write_bayesian_predictions(f, "LL-SVI", predictions_bayesian_llsvi, i, true_label, class_labels)
                    write_bayesian_predictions(f, "LL-Dropout", predictions_bayesian_lldropout, i, true_label, class_labels)
                    patient_count += 1
        except Exception as e:
            logging.error(f"Error generating patient case studies: {e}")
    logging.info(f"Patient case studies saved to: {patient_report_file}")


def generate_clinical_report(overall_metrics: dict, patient_summary: dict, output_dir: str = 'output', filename: str = 'clinical_report.txt'):
    """
    Generates a combined clinical report that includes overall model metrics and a detailed patient-level clinical summary.

    overall_metrics: Dictionary with keys for each model variant (e.g., "Deterministic", "Bayesian LL-SVI", "Bayesian LL-Dropout").
                     Each key maps to a dictionary of metrics, including:
                      - accuracy, auc, binary_accuracy, f1, mae, rmse,
                      - prob_conf (Probability-based Confidence),
                      - entropy_conf (Entropy-based Confidence),
                      - ci_conf (CI-based Confidence),
                      - uc (Uncertainty Degree),
                      - uce (Expected Uncertainty Calibration Error)

    patient_summary: Dictionary with keys:
                     - "patient_id": str, e.g., "Patient 042"
                     - "predicted_score": float (continuous prediction)
                     - "std": float (uncertainty)
                     - "ground_truth": float (continuous ground truth)
                     - "reg_error": float (|prediction - ground_truth|)
                     - "cdf_probs": list of 4 floats (for Mayo 0, 1, 2, 3)
                     - "four_class_label": int (0-3)
                     - "binary_label": int (0 for remission, 1 for non-remission)
                     - "prob_conf": float, probability-based confidence (fraction)
                     - "entropy_conf": float, entropy-based confidence (fraction)
                     - "ci_conf": float, CI-based confidence (fraction)
                     - "uc": float, uncertainty degree (percentage)
                     - "uce": float, Expected Uncertainty Calibration Error (percentage)

    The report is written to a text file.
    """
    os.makedirs(output_dir, exist_ok=True)
    report_path = os.path.join(output_dir, filename)

    # Compute 95% confidence interval from predicted_score and std
    mean = patient_summary.get("predicted_score", 0)
    std = patient_summary.get("std", 0)
    ci_lower = mean - 1.96 * std
    ci_upper = mean + 1.96 * std

    with open(report_path, 'w') as f:
        # Header
        f.write("Clinical Report for DenseNet121\n")
        f.write("=" * 50 + "\n\n")

        # Overall Metrics Section
        f.write("Overall Model Metrics:\n\n")
        for model_name, metrics in overall_metrics.items():
            f.write(f"--- {model_name} Model Metrics ---\n")
            f.write(f"Accuracy: {metrics.get('accuracy', 0)*100:.1f}%\n")
            f.write(f"AUC: {metrics.get('auc', 0):.4f}\n")
            f.write(f"Binary Accuracy: {metrics.get('binary_accuracy', 0)*100:.1f}%\n")
            f.write(f"F1 Score: {metrics.get('f1', 0):.4f}\n")
            if 'mae' in metrics:
                f.write(f"MAE: {metrics.get('mae', 0):.4f}\n")
            if 'rmse' in metrics:
                f.write(f"RMSE: {metrics.get('rmse', 0):.4f}\n")
            f.write(f"Probability-based Confidence (Max Probability): {metrics.get('prob_conf', 0)*100:.1f}%\n")
            f.write(f"Entropy-based Confidence: {metrics.get('entropy_conf', 0)*100:.1f}%\n")
            f.write(f"CI-based Confidence: {metrics.get('ci_conf', 0)*100:.1f}%\n")
            f.write(f"Uncertainty Degree (UC): {metrics.get('uc', 0):.1f}%\n")
            f.write(f"UCE (Expected Uncertainty Calibration Error): {metrics.get('uce', 0):.1f}%\n")
            f.write("\n")

        # Patient-Level Summary Section
        f.write("Patient-Level Case Study\n\n")
        patient_id = patient_summary.get("patient_id", "Patient")
        f.write(f"{patient_id} Clinical Summary:\n")
        f.write(f"Predicted Mayo Score: {mean:.1f} ± {std:.1f}\n")
        f.write(f"Ground Truth: {patient_summary.get('ground_truth', 'N/A'):.1f}\n")
        f.write(f"Regression Error: {patient_summary.get('reg_error', 'N/A'):.4f}\n")
        f.write("Class Probability Distribution (4-Class):\n")
        cdf_probs = patient_summary.get("cdf_probs", [0, 0, 0, 0])
        f.write(f"Mayo 0: {cdf_probs[0]*100:.1f}%\n")
        f.write(f"Mayo 1: {cdf_probs[1]*100:.1f}%\n")
        f.write(f"Mayo 2: {cdf_probs[2]*100:.1f}%\n")
        f.write(f"Mayo 3: {cdf_probs[3]*100:.1f}%\n")
        f.write(f"95% Confidence Range: [{ci_lower:.1f}, {ci_upper:.1f}]\n")
        f.write("\nDiscretized Predictions:\n")
        f.write(f"Four-Class Label: {patient_summary.get('four_class_label', 'N/A')}\n")
        f.write(f"Binary Label: {patient_summary.get('binary_label', 'N/A')} (0 = Remission, 1 = Non-Remission)\n")
        f.write("\nConfidence Metrics:\n")
        f.write(f"Probability-based Confidence (Max Probability): {patient_summary.get('prob_conf', 0)*100:.1f}%\n")
        f.write(f"Entropy-based Confidence: {patient_summary.get('entropy_conf', 0)*100:.1f}%\n")
        f.write(f"CI-based Confidence: {patient_summary.get('ci_conf', 0)*100:.1f}%\n")
        f.write(f"Uncertainty Degree (UC): {patient_summary.get('uc', 0):.1f}%\n")
        f.write(f"UCE (Expected Uncertainty Calibration Error): {patient_summary.get('uce', 0):.1f}%\n")

    logging.info(f"Clinical report generated and saved to {report_path}")


def generate_patient_case_studies_per_class(test_loader, predictions_deterministic, predictions_bayesian_llsvi, predictions_bayesian_lldropout, output_dir='output', max_patients=4, class_labels=None):
    """
    Generates patient case studies ensuring one sample is included for each of the 4 Mayo classes.
    Saves the results to a file.
    """
    import numpy as np
    os.makedirs(output_dir, exist_ok=True)
    patient_report_file = os.path.join(output_dir, "patient_case_studies_per_class.txt")
    logging.info(f"Generating patient case studies per class: {patient_report_file}")

    selected_indices = {}
    all_labels = predictions_deterministic.get('labels')
    if all_labels is None:
        logging.error("No labels found in deterministic results.")
        return

    for idx, label in enumerate(all_labels):
        label_int = int(label)
        if label_int not in selected_indices and label_int in [0, 1, 2, 3]:
            selected_indices[label_int] = idx
        if len(selected_indices) >= 4:
            break

    if class_labels is None:
        class_labels = [f"Mayo {i}" for i in range(4)]

    with open(patient_report_file, 'w') as f:
        f.write("Patient Case Studies (One Sample Per Mayo Class)\n")
        f.write("=" * 50 + "\n")
        for cls in sorted(selected_indices.keys()):
            idx = selected_indices[cls]
            cont_pred = predictions_bayesian_llsvi['continuous_predictions'][idx]
            std = predictions_bayesian_llsvi['uncertainties'][idx]
            ground_truth = all_labels[idx]
            reg_error = abs(cont_pred - ground_truth)
            cdf_probs = predictions_bayesian_llsvi['cdf_probs'][idx]
            four_class_label = predictions_bayesian_llsvi['four_class_predictions'][idx]
            binary_label = predictions_bayesian_llsvi['binary_predictions'][idx]
            prob_conf = np.max(cdf_probs)
            # Import helper functions for confidence metrics
            from utils.helpers import entropy_confidence, ci_confidence, uncertainty_degree
            entropy_conf_val = np.mean(entropy_confidence(np.expand_dims(cdf_probs, axis=0)))
            ci_conf_val = np.mean(ci_confidence(np.array([std])))
            uc = np.mean(uncertainty_degree(np.expand_dims(cdf_probs, axis=0)))
            from utils.evaluation import calculate_uce
            uce, _ = calculate_uce(np.array([ground_truth]), np.array([cont_pred]), np.array([std]))

            f.write(f"\n--- Patient Case Study for {class_labels[cls]} ---\n")
            f.write(f"Patient Index: {idx}\n")
            f.write(f"Ground Truth: {ground_truth} ({class_labels[cls]})\n")
            f.write(f"Predicted Mayo Score: {cont_pred:.1f} ± {std:.1f}\n")
            f.write(f"Regression Error: {reg_error:.4f}\n")
            f.write("Class Probability Distribution (4-Class):\n")
            f.write(f"  Mayo 0: {cdf_probs[0]*100:.1f}%\n")
            f.write(f"  Mayo 1: {cdf_probs[1]*100:.1f}%\n")
            f.write(f"  Mayo 2: {cdf_probs[2]*100:.1f}%\n")
            f.write(f"  Mayo 3: {cdf_probs[3]*100:.1f}%\n")
            ci_lower = cont_pred - 1.96 * std
            ci_upper = cont_pred + 1.96 * std
            f.write(f"95% Confidence Range: [{ci_lower:.1f}, {ci_upper:.1f}]\n")
            f.write("\nDiscretized Predictions:\n")
            f.write(f"  Four-Class Label: {four_class_label}\n")
            f.write(f"  Binary Label: {binary_label} (0 = Remission, 1 = Non-Remission)\n")
            f.write("\nConfidence Metrics:\n")
            f.write(f"  Probability-based Confidence (Max Probability): {prob_conf*100:.1f}%\n")
            f.write(f"  Entropy-based Confidence: {entropy_conf_val*100:.1f}%\n")
            f.write(f"  CI-based Confidence: {ci_conf_val*100:.1f}%\n")
            f.write(f"  Uncertainty Degree (UC): {uc:.1f}%\n")
            f.write(f"  UCE (Expected Uncertainty Calibration Error): {uce:.1f}%\n")

    logging.info(f"Patient case studies generated and saved to: {patient_report_file}")
