[![Fixel Algorithms](https://fixelalgorithms.co/images/CCExt.png)](https://fixelalgorithms.gitlab.io/)

# Classifier - Performance Evaluation: Precision, Recall, ROC and AUC

> Notebook by:
> - Royi Avital RoyiAvital@fixelalgorithms.com

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 0.1.000 | 20/09/2022 | Royi Avital | First version                                                      |
|         |            |             |                                                                    |

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/IntroductionMachineLearningSystemEngineers/ClassificationPrecisionRecall.ipynb)

In [None]:
# Import Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Machine Learning
from sklearn.datasets import make_moons
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

from scipy.spatial.distance import cdist

# Misc
import datetime
import os
from platform import python_version
import random
import warnings
import yaml

# Typing
from typing import Tuple

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Jupyter
from IPython import get_ipython
from IPython.display import Image, display
from ipywidgets import Dropdown, FloatSlider, interact, IntSlider, Layout

In [None]:
# Configuration
%matplotlib inline

warnings.filterwarnings("ignore")

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

# sns.set_theme() #>! Apply SeaBorn theme

runInGoogleColab = 'google.colab' in str(get_ipython())

In [None]:
# Constants

FIG_SIZE_DEF = (8, 8)
ELM_SIZE_DEF = 50
CLASS_COLOR = ('b', 'r')
EDGE_COLOR  = 'k'


In [None]:
# Fixel Algorithms Packages


In [None]:
# Parameters

# Data Generation
numSamples0 = 950
numSamples1 = 50

noiseLevel = 0.1

testSize = 0.5

# Data Visuzalization
numGridPts = 250

In [None]:
# Auxiliary Functions

def PlotBinaryClassData( mX: np.ndarray, vY: np.ndarray, hA:plt.Axes = None, figSize: Tuple[int, int] = FIG_SIZE_DEF, elmSize: int = ELM_SIZE_DEF, classColor: Tuple[str, str] = CLASS_COLOR, axisTitle: str = None ) -> plt.Axes:

    if hA is None:
        hF, hA = plt.subplots(figsize = figSize)
    else:
        hF = hA.get_figure()
    
    vC, vN = np.unique(vY, return_counts = True)

    numClass = len(vC)
    if (len(vC) != 2):
        raise ValueError(f'The input data is not binary, the number of classes is: {numClass}')

    vIdx0 = vY == vC[0]
    vIdx1 = vY == vC[1] #<! Basically ~vIdx0

    hA.scatter(mX[vIdx0, 0], mX[vIdx0, 1], s = elmSize, color = classColor[0], edgecolor = 'k', label = f'$C_\u007b {vC[0]} \u007d$')
    hA.scatter(mX[vIdx1, 0], mX[vIdx1, 1], s = elmSize, color = classColor[1], edgecolor = 'k', label = f'$C_\u007b {vC[1]} \u007d$')
    hA.axvline(x = 0, color = 'k')
    hA.axhline(y = 0, color = 'k')
    hA.axis('equal')
    if axisTitle is not None:
        hA.set_title(axisTitle)
    hA.legend()
    
    return hA

def DisplayConfusionMatrix( vY, vYPred, lClasses, hAx = None ) -> None:
    
    mConfMat = confusion_matrix(vY, vYPred, labels = lClasses)
    cmDisp = ConfusionMatrixDisplay(confusion_matrix = mConfMat, display_labels = lClasses)

    cmPlot = cmDisp.plot()
    hA = cmPlot.ax_
    hA.grid(False)
    plt.show()

## Generate Data

In [None]:
mX, vY    = make_moons(n_samples = [numSamples0, numSamples1], noise = noiseLevel)
# vY[vY == 0] = -1
mX.shape, vY.shape

### Plot Data

In [None]:
vIdx0 = vY == 0
vIdx1 = vY == 1

hA = PlotBinaryClassData(mX, vY, axisTitle = 'Samples Data')

## Train SVM Classifier

In [None]:
# SVM Linear Model
oSVM  = SVC(kernel = 'linear').fit(mX, vY)
modelScore = oSVM.score(mX, vY)

print(f'The model score (Accuracy) on the data: {modelScore}') #<! Accuracy

### Plot Decision Boundary

In [None]:
# Grid of the data support
v0       = np.linspace(mX[:, 0].min(), mX[:, 0].max(), numGridPts)
v1       = np.linspace(mX[:, 1].min(), mX[:, 1].max(), numGridPts)
XX0, XX1 = np.meshgrid(v0, v1)
XX       = np.c_[XX0.ravel(), XX1.ravel()]

Z = oSVM.predict(XX)
Z = Z.reshape(XX0.shape)

plt.figure(figsize = FIG_SIZE_DEF)
plt.contourf(XX0, XX1, Z, colors = CLASS_COLOR, alpha = 0.3, levels = [-0.5, 0.5, 1.5])
plt.scatter(mX[vIdx0, 0], mX[vIdx0, 1], s = ELM_SIZE_DEF, c = CLASS_COLOR[0], edgecolor = EDGE_COLOR)
plt.scatter(mX[vIdx1, 0], mX[vIdx1, 1], s = ELM_SIZE_DEF, c = CLASS_COLOR[1], edgecolor = EDGE_COLOR)
plt.show()

### Display the Confusion Matrix

In [None]:
DisplayConfusionMatrix(vY, oSVM.predict(mX), lClasses = oSVM.classes_)

### Compute the Scores: Precision, Recall and F1

In [None]:
vHatY                    = oSVM.predict(mX)
precision, recall, f1, _ = precision_recall_fscore_support(vY, vHatY, pos_label = 1, average = 'binary')

print(f'Precision = {precision}')
print(f'Recall    = {recall}'   )
print(f'f1        = {f1}'       )

### Plot ROC and AUC

In [None]:
vScore         = oSVM.decision_function(mX) #<! Values proportional to distance from the separating hyperplane
vFP, vTP, vThr = roc_curve(vY, vScore, pos_label = 1)
AUC            = auc(vFP, vTP)

print(f'AUC = {AUC}')

In [None]:
plt.figure(figsize = (16, 8))

plt.subplot(1, 2, 1)
plt.plot(vFP, vTP, color = 'b', lw = 2, label = f'ROC Curve, AUC = {AUC:.3f}')
plt.plot([0, 1], [0, 1], color = 'k', lw = 2, linestyle = '--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.contourf(XX0, XX1, Z, colors = CLASS_COLOR, alpha = 0.3, levels = [-0.5, 0.5, 1.5])
plt.scatter(mX[vIdx0, 0], mX[vIdx0, 1], s = ELM_SIZE_DEF, c = CLASS_COLOR[0], edgecolor = EDGE_COLOR)
plt.scatter(mX[vIdx1, 0], mX[vIdx1, 1], s = ELM_SIZE_DEF, c = CLASS_COLOR[1], edgecolor = EDGE_COLOR)

plt.show()

In [None]:
vScore = oSVM.decision_function(XX)
mScore = vScore.reshape(XX0.shape)

def PlotRoc(idx):
    _, vAx = plt.subplots(1, 2, figsize = (14, 6))
    ax = vAx[0]
    ax.plot(vFP, vTP, color = 'b', lw = 3, label = f'AUC = {AUC:.3f}')
    ax.plot([0, 1], [0, 1], color = 'k', lw = 2, linestyle = '--')
    ax.axvline(x = vFP[idx], color = 'g', lw = 2, linestyle = '--')
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title ('ROC' f'\n$\\alpha = {vThr[idx]}$')
    ax.axis('equal')
    ax.legend()
    ax.grid()    
    
    Z  = mScore > vThr[idx]
    ax = vAx[1]
    ax.contourf(XX0, XX1, Z, colors = CLASS_COLOR, alpha = 0.3, levels=[0, 0.5, 1.0])
    ax.scatter(mX[vIdx0, 0], mX[vIdx0, 1], s = ELM_SIZE_DEF, c = CLASS_COLOR[0], edgecolor = EDGE_COLOR)
    ax.scatter(mX[vIdx1, 0], mX[vIdx1, 1], s = ELM_SIZE_DEF, c = CLASS_COLOR[1], edgecolor = EDGE_COLOR)
    


In [None]:
idxSlider = IntSlider(min = 0, max = len(vThr) - 1, step = 1, value = 0, layout = Layout(width = '30%'))
interact(PlotRoc, idx = idxSlider)

plt.tight_layout()
plt.show()

### Understanding the AUC

The AUC of a classifier is equal to the probability that the classifier will rank a randomly chosen positive example higher than a randomly chosen negative example: $P\Big(\text{score}(x^+) > \text{score}(x^-)\Big)$

In [None]:
vYRoc = np.array(['P', 'P', 'N', 'P', 'P' , 'P' , 'N' , 'N' , 'P' , 'N' , 'P' , 'N' , 'P' , 'N' , 'N' , 'N' , 'P' , 'N' , 'P' , 'N'])
vSRoc = np.array([0.9, 0.8, 0.9, 0.6, 0.55, 0.51, 0.49, 0.43, 0.22, 0.39, 0.13, 0.31, 0.23, 0.22, 0.19, 0.15, 0.12, 0.11, 0.04, 0.01]) #<! Score values (Probability for Class 1)

vYRoc = np.where(vYRoc == 'P', 1, 0) #<! Labels


In [None]:
vFPRoc, vTPRoc, vThrRoc = roc_curve(vYRoc, vSRoc, pos_label = 1)
aucRoc                  = auc(vFPRoc, vTPRoc)
print(f'AUC = {aucRoc}')

In [None]:
plt.figure(figsize = (8, 8))

plt.plot(vFPRoc, vTPRoc, color = 'b', lw = 2, label = f'ROC Curve, AUC = {aucRoc:.3f}')
plt.plot([0, 1], [0, 1], color = 'k', lw = 2, linestyle = '--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC')
plt.grid()
plt.legend()


In [None]:
vIdx0Roc = vYRoc == 0
vIdx1Roc = vYRoc == 1

# Empirical AUC
empAuc = np.mean(np.random.choice(vSRoc[vIdx1Roc], 1000) > np.random.choice(vSRoc[vIdx0Roc], 1000))
print(f'Empirical AUC: {empAuc}')

In [None]:
vYEstRoc = vSRoc > 0.5
vYEstRoc = vYEstRoc.astype(np.int32)

In [None]:
# precisionRoc, recallRoc, f1Roc, _   = precision_recall_fscore_support(vYRoc, vYEstRoc, pos_label = 1, average = 'binary')
# tnRoc, fpRoc, fnRoc, tpRoc          = confusion_matrix(vYRoc, vYEstRoc).ravel()
# specificityRoc = tnRoc / (tnRoc + fpRoc)
# 0.5 * (recallRoc + specificityRoc)

In [None]:
# The AUC and Accuracy are not equivalent!
empAccu = np.mean(vYEstRoc == vYRoc)
print(f'Empirical Accuracy: {empAccu}')