In [None]:
# default_exp metrics

# Metrics

> API details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
# export

from reliability import utils

import numpy as np
import scipy.stats
import scipy.integrate

import mclearn.performance
import sklearn.metrics

In [None]:
# export

def ece(y_probs, y_preds, y_true, balanced=False):
    """Compute the expected calibration error (ECE).

    Parameters:
    y_probs (np.array): predicted class probabilities
    y_preds (np.array): predicted class labels
    y_true (np.array): true class labels

    Returns:
    exp_ce (float): expected calibration error

    """

    # define the bin function
    def bin_func(y_probs_bin, y_preds_bin, y_true_bin):
        acc = (y_preds_bin == y_true_bin).mean()
        conf = y_probs_bin.mean()
        return abs(acc - conf)

    # define the balanced bin function
    def balanced_bin_func(y_probs_bin, y_preds_bin, y_true_bin):
        balacc = sklearn.metrics.balanced_accuracy_score(y_true_bin, y_preds_bin)
        conf = y_probs_bin.mean()
        return abs(balacc - conf)

    # compute the full result
    bin_indices = utils.get_bin_indices(y_probs)
    func = balanced_bin_func if balanced else bin_func
    return utils.binning(y_probs, y_preds, y_true, bin_indices, func)

In [None]:
# export

def ece_v2(y_probs, y_preds, y_true):
    """Compute the expected calibration error based on the expected posterior balanced accuracy (ECEv2).

    Parameters:
    y_probs (np.array): predicted class probabilities
    y_preds (np.array): predicted class labels
    y_true (np.array): true class labels

    Returns:
    exp_ce (float): expected calibration error

    """

    # define the bin function
    def bin_func(y_probs_bin, y_preds_bin, y_true_bin):
        confusion = sklearn.metrics.confusion_matrix(y_true_bin, y_preds_bin)
        acc = mclearn.performance.balanced_accuracy_expected(confusion, fft=True)
        conf = y_probs_bin.mean()
        return abs(acc - conf)

    # compute the full result
    bin_indices = utils.get_bin_indices(y_probs)
    return utils.binning(y_probs, y_preds, y_true, bin_indices, bin_func)

In [None]:
# export

def ece_v3(y_probs, y_preds, y_true, samples=1000):
    """Compute the ECE based on the posterior balanced accuracy distribution (ECEv3).

    Parameters:
    y_probs (np.array): predicted class probabilities
    y_preds (np.array): predicted class labels
    y_true (np.array): true class labels
    samples (int): number of samples for numerical integration

    Returns:
    exp_ce (float): expected calibration error

    """

    # define the bin function
    def bin_func(y_probs_bin, y_preds_bin, y_true_bin):
        # estimate beta parameters
        confusion = sklearn.metrics.confusion_matrix(y_true_bin, y_preds_bin)
        params = mclearn.performance.get_beta_parameters(confusion)

        # approximate the integral using Simpson's rule
        xs = np.linspace(0, 1, samples)
        conf = y_probs_bin.mean()
        ys = abs(xs - conf) * mclearn.performance.beta_avg_pdf(xs, params, fft=True)
        return scipy.integrate.simps(ys, xs)

    # compute the full result
    bin_indices = utils.get_bin_indices(y_probs)
    return utils.binning(y_probs, y_preds, y_true, bin_indices, bin_func)

In [None]:
# export

def ece_v4(y_probs, y_preds, y_true, samples=1000):
    """Compute the ECE based on the posterior balanced accuracy distribution (ECEv3).

    Parameters:
    y_probs (np.array): predicted class probabilities
    y_preds (np.array): predicted class labels
    y_true (np.array): true class labels
    samples (int): number of samples for numerical integration

    Returns:
    exp_ce (float): expected calibration error

    """

    # define the bin function
    def bin_func(y_probs_bin, y_preds_bin, y_true_bin):
        # estimate beta parameters
        confusion = sklearn.metrics.confusion_matrix(y_true_bin, y_preds_bin)
        params = mclearn.performance.get_beta_parameters(confusion)

        # approximate the integral using Simpson's rule
        xs = np.linspace(0, 1, samples)
        conf = y_probs_bin.mean()

        integrands = np.empty((len(confusion),), dtype=np.float32)
        for i in range(len(confusion)):
            ys = abs(xs - conf) * scipy.stats.beta.pdf(xs, params[i][0], params[i][1])
            integrands[i] = scipy.integrate.simps(ys, xs)

        return integrands.mean()

    # compute the full result
    bin_indices = utils.get_bin_indices(y_probs)
    return utils.binning(y_probs, y_preds, y_true, bin_indices, bin_func)

In [None]:
# export

def class_wise_ece(y_probs, y_preds, y_true, base_ece, *base_ece_args, **base_ece_kwargs):
    """Compute classwise-ECE as proposed in "Beyond temperature scaling: Obtaining well-calibrated
    multiclass probabilities with Dirichlet calibration" (Kull, 2019).

    Parameters:
    y_probs (np.array): predicted class probabilities
    y_preds (np.array): predicted class labels
    y_true (np.array): true class labels
    base_ece (callable): function that returns ECE for given probabilities, label predictions and true labels
    base_ece_[kw]args ([kw]args): [Keyword ]arguments that should be passed to the base_ece callable.

    Returns:
    exp_ce (float): class-wise expected calibration error

    """

    classes = np.unique(y_preds)
    result = 0.
    for j in classes:
        sel = y_preds == j
        result += base_ece(y_probs[sel], y_preds[sel], y_true[sel], *base_ece_args, **base_ece_kwargs)

    return result/len(classes)