# Mind your prevalance!

Upon execution of this notebook an interactive plot shows. The plot displays
the relationship between five performance metrics and prevalence at given values
of sensitivity and specificity. The performance metrics are Accuracy,
Matthews' correlation coefficient, positive predictivity,
negative predictivity and Cohen's Kappa coefficient.
Check boxes allow the user to choose which performance metric to display.
Sliders allow the user to vary the values of sensitivity and specificity.


## Imports

In [1]:
from ipywidgets import Checkbox, interact, FloatSlider
from IPython.display import display
import matplotlib.pyplot as plt, random
import numpy as np
import math

## Interactive plot of performance metrics

In [2]:
def accuracy(sensitivity, specificity, prevalence):
    """Returns the accuracy at the given
     sensitivity, specificity and prevalence.
    
    Parameters
    ----------
    sensitivity : float
        The sensitivity of the model
    specificity : float
        The specificity of the model
    prevalence : float
        The prevalence of the test set
        
    Returns
    ------
    float
        Accuracy as a float
    """
    
    result = prevalence * sensitivity + (1 - prevalence) * specificity
    return(result)

def mcc(sensitivity, specificity, prevalence):
    """Returns the Matthews' correlation coefficient at the given
     sensitivity, specificity and prevalence.
    
    Parameters
    ----------
    sensitivity : float
        The sensitivity of the model
    specificity : float
        The specificity of the model
    prevalence : float
        The prevalence of the test set
        
    Returns
    ------
    float
        Matthews' correlation coefficient as a float
    """
    numerator = sensitivity + specificity - 1
    denominatorFirstTerm = sensitivity + (1 - specificity)*(1 - prevalence) / prevalence
    denominatorSecondTerm = specificity + (1 -sensitivity)*prevalence/(1 - prevalence) 
    denominator = math.sqrt(denominatorFirstTerm * denominatorSecondTerm)
    
    if sensitivity == 1 and specificity == 0:
        denominator = 1
    if sensitivity == 0 and specificity == 1:
        denominator = 1.
    
    return(numerator / denominator)

def kappa(sensitivity, specificity, prevalence):
    """Returns the Cohen's kappa coefficient at the given
     sensitivity, specificity and prevalence.
    
    Parameters
    ----------
    sensitivity : float
        The sensitivity of the model
    specificity : float
        The specificity of the model
    prevalence : float
        The prevalence of the test set
        
    Returns
    ------
    float
        Cohen's kappa coefficient as a float
    """
    numerator = 2 * (sensitivity + specificity - 1)
    denominatorFirstTerm = sensitivity + (1 - specificity)*(1 - prevalence) / prevalence
    denominatorSecondTerm = specificity + (1 -sensitivity)*prevalence/(1 - prevalence) 
    denominator = denominatorFirstTerm + denominatorSecondTerm
    
    if sensitivity == 1 and specificity == 0:
        denominator = 1
    if sensitivity == 0 and specificity == 1:
        denominator = 1.
    
    return(numerator / denominator)

def negativePredictivity(sensitivity, specificity, prevalence):
    """Returns the negative predictivity at the given
     sensitivity, specificity and prevalence.
    
    Parameters
    ----------
    sensitivity : float
        The sensitivity of the model
    specificity : float
        The specificity of the model
    prevalence : float
        The prevalence of the test set
        
    Returns
    ------
    float
        Negative predictivity as a float
    """
    
    numerator = specificity * (1 - prevalence)
    denominator = specificity * (1 - prevalence) + (1 - sensitivity) * prevalence
    if specificity == 0 and sensitivity == 1:
        denominator = 1
    
    result = numerator / denominator
    
    return(result)

def positivePredictivity(sensitivity, specificity, prevalence):
    """Returns the positive predictivity at the given
     sensitivity, specificity and prevalence.
    
    Parameters
    ----------
    sensitivity : float
        The sensitivity of the model
    specificity : float
        The specificity of the model
    prevalence : float
        The prevalence of the test set
        
    Returns
    ------
    float
        Positive predictivity as a float
    """
    
    numerator = sensitivity * prevalence
    denominator = sensitivity * prevalence + (1 - specificity) * (1 - prevalence)
    if specificity == 1 and sensitivity == 0:
        denominator = 1
    
    result = numerator / denominator
    
    return(result)

# Sensitivity slider for interactive plot
sensitvity_sld = FloatSlider(
    value=0.71,
    min=0,
    max=1.0,
    step=0.01,
    description='Sensitivity',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='.2f'
)

# Specificity slider for interactive plot
specificity_sld = FloatSlider(
    value=0.89,
    min=0,
    max=1.0,
    step=0.01,
    description='Specificity',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='.2f'
)

# Check boxes for interactive plot
acc_chk = Checkbox(
    value=True,
    description="Accuracy")
mcc_chk = Checkbox(
    value=True,
    description="Matthews' correlation coefficient")
ppv_chk = Checkbox(
    value=True,
    description="Positive predictivity")
npv_chk = Checkbox(
    value=True,
    description="Negative predictivity")
kappa_chk = Checkbox(
    value=True,
    description="Cohen's Kappa coefficient")

@interact(sensitivity = sensitvity_sld, specificity = specificity_sld, acc = acc_chk, mcc = mcc_chk, ppv = ppv_chk, npv = npv_chk, kappa = kappa_chk) 
def updatePlot(sensitivity = 0.71, specificity = 0.89, showAccurary = acc_chk, showMcc = mcc_chk, showPpv = ppv_chk, showNpv = npv_chk, showKappa = kappa_chk):
    """Returns an interactive plot of performemce metrics against prevalence at the given
     sensitivity and specificity.
    
    Parameters
    ----------
    sensitivity : float, optional
        The sensitivity of the model
    specificity : float, optional
        The specificity of the model
    showAccuracy: boolean, optional
        If True, shows the plot of accuracy against prevalence
    showMcc: boolean, optional
        If True, shows the plot of Matthews' correlation coefficient against prevalence
    showPpv: boolean, optional
        If True, shows the plot of positive predictivity against prevalence
    showNpv: boolean, optional
        If True, shows the plot of negative predictivity against prevalence
    showKappa: boolean, optional
        If True, shows the plot of Cohen's kappa coefficient against prevalence
        
    Returns
    ------
    plot
        The chosen performance metrics as a function ot prevalence at given values of sensitivity
        and specificity
    """
    
    fig, ax = plt.subplots(figsize=(7, 7))
    
    # x axis
    x = np.linspace(0.0000000001, 0.9999999999, 101)
    
    # y axis
    y_acc = y_mcc = y_kappa = y_ppv = y_npv = np.array([])
    for i in x:
        y_acc = np.append(y_acc, accuracy(sensitivity, specificity, i))
        y_mcc = np.append(y_mcc, mcc(sensitivity, specificity, i))
        y_kappa = np.append(y_kappa, kappa(sensitivity, specificity, i))
        y_ppv = np.append(y_ppv, positivePredictivity(sensitivity, specificity, i))
        y_npv = np.append(y_npv, negativePredictivity(sensitivity, specificity, i))

    # plots
    ax.plot(x, y_acc, visible=showAccurary, linewidth = 2, label = "Accuracy")
    ax.plot(x, y_mcc, visible=showMcc, linewidth = 2, label = "MCC")
    ax.plot(x, y_ppv, visible=showPpv, linewidth = 2, label = "Positive predictivity")
    ax.plot(x, y_npv, visible=showNpv, linewidth = 2, label = "Negative predictivity")
    ax.plot(x, y_kappa, visible=showKappa, linewidth = 2, label = "Kappa")
    
    plt.xlim([-0.025, 1.025])
    plt.ylim([-1.025, 1.025])
    
    font = {'size':15}
    plt.legend()
    
    plt.xlabel("Test set prevalence", fontdict = font)
    plt.ylabel("Metric value", fontdict = font)
    
    plt.xticks(fontsize = 15)
    plt.yticks(fontsize = 15)
    
    plt.grid(visible = True)
    
    plt.show()

interactive(children=(FloatSlider(value=0.71, description='Sensitivity', max=1.0, step=0.01), FloatSlider(valu…