In [6]:
import numpy as np

In [7]:
y_true = np.array([0, 0, 1, 1])
y_scores = np.array([0.1, 0.4, 0.35, 0.8])

In [20]:
def calculate_auc(y_true, y_pred):

    print(f'Initially values are y_true {y_true}, y_pred {y_pred}')
    
    # Sort scores and corresponding truth values BY SCORES
    # AUC calculation requires evaluating the model's performance at different threshold levels
    # from the highest score to the lowest
    sorted_indices = np.argsort(y_pred)[::-1]
    y_true = y_true[sorted_indices]
    y_pred = y_pred[sorted_indices]

    print(f'Sorted values are y_true {y_true}, y_pred {y_pred}')
    
    # Count positive and negative samples
    n_pos = np.sum(y_true == 1)
    n_neg = len(y_true) - n_pos
    
    print(f'Num positive samples = {n_pos}, num negative samples {n_neg}')

    # Calculate TPR and FPR
    # TPR (True Positive Rate) - cumulative sum of true positives divided by the total number of positives.
    # FPR (False Positive Rate) - cumulative sum of false positives divided by the total number of negatives.

    # Using cumsum is effectively calculating these rates at each possible threshold
    # **NOTE**: Because we ordered in decreasing order of predictions, the preds BELOW the cutoff will be 1, ABOVE will be 0
    tpr = np.cumsum(y_true) / n_pos
    fpr = np.cumsum(1 - y_true) / n_neg

    print(f'TPR and FPR are {tpr}, {fpr}')
    
    # Add start and end points
    tpr = np.concatenate([[0], tpr, [1]])
    fpr = np.concatenate([[0], fpr, [1]])

    print(f'Full TPR and FPR are {tpr}, {fpr}')

    
    # Calculate AUC using trapezoidal rule
    auc = np.trapezoid(tpr, fpr)
    
    return auc

In [21]:
auc = calculate_auc(y_true, y_scores)
auc

Initially values are y_true [0 0 1 1], y_pred [0.1  0.4  0.35 0.8 ]
Sorted values are y_true [1 0 1 0], y_pred [0.8  0.4  0.35 0.1 ]
Num positive samples = 2, num negative samples 2
TPR and FPR are [0.5 0.5 1.  1. ], [0.  0.5 0.5 1. ]
Full TPR and FPR are [0.  0.5 0.5 1.  1.  1. ], [0.  0.  0.5 0.5 1.  1. ]


np.float64(0.75)