<a href="https://colab.research.google.com/github/Sibusisongwenya/WIP-Project/blob/main/discretize.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# utils/helpers.py

import numpy as np
from scipy.stats import norm

def discretize_to_four_class(prediction, boundaries=[0.5, 1.5, 2.5]):
    """
    Discretizes a continuous Mayo score prediction into one of four classes: 0, 1, 2, or 3.
    """
    return int(np.digitize(prediction, boundaries, right=False))

def discretize_binary(prediction, threshold=1.5):
    """
    Discretizes a continuous Mayo score prediction into a binary label:
    0 for remission (if prediction < threshold), 1 for non-remission (if prediction >= threshold).
    """
    return 0 if prediction < threshold else 1

def entropy_confidence(probs):
    """
    Computes entropy-based confidence for each sample.
    Returns a confidence value (between 0 and 1) for each sample, where 1 means highest confidence.
    """
    epsilon = 1e-10
    entropy = -np.sum(probs * np.log(probs + epsilon), axis=1)
    max_entropy = np.log(probs.shape[1])
    conf = 1 - (entropy / max_entropy)
    return conf


def ci_confidence(stds: np.ndarray, confidence_level: float = 0.95) -> np.ndarray:
    """
    Calculates the widths of the confidence intervals based on the provided standard deviations.

    Args:
        stds (np.ndarray): Standard deviations of the predictions.
        confidence_level (float): The desired confidence level (default: 0.95).

    Returns:
        np.ndarray: Widths of the confidence intervals.
    """
    # Calculate the critical z-score for the given confidence level.
    z_score = norm.ppf((1 + confidence_level) / 2)

    # Calculate the confidence interval width for each standard deviation.
    ci_widths = np.where(stds != 0, 2 * z_score * stds, 0)

    return ci_widths

def uncertainty_degree(probs):
    """
    Computes uncertainty degree as a percentage based on the maximum probability.
    Lower maximum probability indicates higher uncertainty.
    """
    max_prob = np.max(probs, axis=1)
    return 100 * (1 - max_prob)

def probability_based_confidence(probs):
    """
    Computes probability-based confidence by taking the maximum probability from each sample's
    predicted probability distribution and then averaging these values.

    Args:
        probs (numpy.ndarray): Array of predicted probabilities with shape (N, num_classes).

    Returns:
        float: The average maximum probability (value between 0 and 1).
    """
    return np.mean(np.max(probs, axis=1))
