<a href="https://colab.research.google.com/github/Sibusisongwenya/WIP-Project/blob/main/plotting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# utils/plotting.py

import os
from pathlib import Path
import itertools
import logging

import numpy as np
import matplotlib.pyplot as plt
from sklearn.calibration import calibration_curve
from scipy.stats import linregress

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def ensure_directory_exists(directory: str) -> None:
    try:
        os.makedirs(directory, exist_ok=True)
    except Exception as e:
        logging.error(f"Failed to create directory '{directory}': {e}")
        raise

def save_figure(output_dir: str, filename: str) -> None:
    output_dir_path = Path(output_dir)
    ensure_directory_exists(output_dir)
    filepath = output_dir_path / filename
    try:
        plt.savefig(filepath)
        logging.info(f"Saved plot to {filepath}")
    except Exception as e:
        logging.error(f"Failed to save plot {filepath}: {e}")
    finally:
        plt.close()

def plot_regression_uncertainty(means: np.ndarray, stds: np.ndarray, ground_truth: np.ndarray,
                                output_dir: str = 'output', filename: str = 'regression_uncertainty.png') -> None:
    plt.figure(figsize=(10, 6))
    sorted_idx = np.argsort(means)
    sorted_means = means[sorted_idx]
    sorted_stds = stds[sorted_idx]
    sorted_truth = ground_truth[sorted_idx]

    plt.errorbar(np.arange(len(sorted_means)), sorted_means, yerr=sorted_stds,
                 fmt='o', alpha=0.7, label='Prediction ± STD')
    plt.scatter(np.arange(len(sorted_truth)), sorted_truth, color='red', label='Ground Truth')
    plt.title("Regression Predictions with Uncertainty")
    plt.xlabel("Samples (sorted by prediction)")
    plt.ylabel("Mayo Score")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    save_figure(output_dir, filename)

def plot_calibration_histograms(probs: np.ndarray, labels: np.ndarray, probability_method: str, n_bins: int = 10, output_dir: str = 'output') -> None:
    if probs.ndim != 2:
        raise ValueError("`probs` must be a 2D array.")
    if labels.ndim != 1:
        raise ValueError("`labels` must be a 1D array.")
    if len(probs) != len(labels):
        raise ValueError("Number of samples in `probs` and `labels` must match.")

    ensure_directory_exists(output_dir)
    num_classes = probs.shape[1]
    for class_index in range(num_classes):
        class_probs = probs[:, class_index]
        correct_preds = class_probs[labels == class_index]
        incorrect_preds = class_probs[labels != class_index]

        plt.figure(figsize=(10, 6))
        bins = np.histogram_bin_edges(class_probs, bins=n_bins)
        plt.hist(correct_preds, bins=bins, alpha=0.7, label='Correct Predictions', color='blue')
        plt.hist(incorrect_preds, bins=bins, alpha=0.7, label='Incorrect Predictions', color='red')
        plt.xlabel(f'Predicted Probability (Class {class_index})', fontsize=12)
        plt.ylabel('Frequency', fontsize=12)
        plt.title(f'Calibration Histogram for Class {class_index} ({probability_method.upper()})', fontsize=14)
        plt.legend(loc='upper center')
        plt.grid(True, linestyle='--', alpha=0.5)
        plt.tight_layout()
        save_figure(output_dir, f'calibration_histogram_class_{class_index}_{probability_method}.png')

def plot_uncertainty_histogram(uncertainties: np.ndarray, model_name: str, probability_method: str, output_dir: str = 'output') -> None:
    ensure_directory_exists(output_dir)
    plt.figure(figsize=(8, 6))
    plt.hist(uncertainties, bins=30, alpha=0.7, color='purple')
    plt.xlabel('Uncertainty (Std. Dev.)', fontsize=12)
    plt.ylabel('Frequency', fontsize=12)
    plt.title(f'Uncertainty Histogram - {model_name} ({probability_method.upper()})', fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()
    save_figure(output_dir, f'uncertainty_histogram_{model_name}_{probability_method}.png')

def plot_uncertainty_vs_error(uncertainties: np.ndarray, correct_predictions: np.ndarray, model_name: str, probability_method: str, output_dir: str = 'output') -> None:
    ensure_directory_exists(output_dir)
    correct_predictions = np.array(correct_predictions, dtype=bool)
    error_rate = 1 - correct_predictions.astype(float)
    plt.figure(figsize=(8, 6))
    plt.scatter(uncertainties, error_rate, alpha=0.5, color='navy')
    slope, intercept, _, _, _ = linregress(uncertainties, error_rate)
    regression_line = slope * uncertainties + intercept
    plt.plot(uncertainties, regression_line, color="red", label="Regression Line")
    plt.xlabel('Uncertainty (Std. Dev.)', fontsize=12)
    plt.ylabel('Error Rate (1 - Accuracy)', fontsize=12)
    plt.title(f'Uncertainty vs. Error Rate - {model_name} ({probability_method.upper()})', fontsize=14)
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()
    save_figure(output_dir, f'uncertainty_vs_error_{model_name}_{probability_method}.png')

def plot_confusion_matrix(cm: np.ndarray, classes: list, normalize: bool = False, title: str = 'Confusion Matrix', cmap=plt.cm.Blues, output_dir: str = 'output', filename: str = 'conf_matrix.png') -> None:
    if not isinstance(cm, np.ndarray) or cm.shape[0] != cm.shape[1]:
        raise ValueError("Confusion matrix must be a square numpy array.")
    ensure_directory_exists(output_dir)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        title += " (Normalized)"
    plt.figure(figsize=(8, 8))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.0
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    save_figure(output_dir, filename)

def plot_reliability_diagram(probs: np.ndarray, labels: np.ndarray, class_labels: list, n_bins: int = 10, output_dir: str = 'output', model_name: str = '') -> None:
    if probs.ndim != 2 or labels.ndim != 1:
        raise ValueError("Invalid shapes for probs or labels.")
    ensure_directory_exists(output_dir)
    plt.figure(figsize=(8, 6))
    for class_index in range(probs.shape[1]):
        prob_true, prob_pred = calibration_curve(labels == class_index, probs[:, class_index], n_bins=n_bins)
        plt.plot(prob_pred, prob_true, marker='.', label=f'Class {class_labels[class_index]}')
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Perfect Calibration')
    plt.xlabel('Mean Predicted Probability', fontsize=12)
    plt.ylabel('Fraction of Positives', fontsize=12)
    plt.title(f'Reliability Diagram{" - " + model_name if model_name else ""}', fontsize=14)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    filename = f'reliability_diagram_{model_name if model_name else "deterministic"}.png'
    save_figure(output_dir, filename)
