In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.metrics import confusion_matrix
from sklearn import metrics


# Confusion Matrix - simple demo

## Notes

## Helper Functions

### `show_CM_as_heatmap(cm)`

Prints a CM using the Seaborn heatmap

In [2]:
def show_CM_as_heatmap(cm):
    # print with seaborn - needs df:
    df_cm = pd.DataFrame(cm, index = class_values, columns=class_values)
    # print_confusion_matrix_seaborn(cm, class_names, figsize = (10,7), fontsize=14)

    sns.set(font_scale=1)#for label size
    sns.heatmap(df_cm, annot=True, annot_kws={"size": 16})# font size

    plt.show()


### Calculate Presision
#### `calc_precision(tp, y_pred)`

**Precision**: the fraction of relevant instances (TP) among the retrieved instances (TP+FP)

TP+FP = all positive classes, predicted my the model

**PR = TP / TP+FP**

In [5]:
def calc_precision(tp, y_pred):
    ''' Precision: the fraction of relevant instances (TP) among the retrieved instances (TP+FP)

        TP+FP = all positive classes, predicted my the model
     '''
    return tp / tp+fp


### Calculate Recall
#### `calc_recall(tp, y_pred)`

**Recall**: the fraction of the total amount of relevant instances that were actually retrieved

TP + FN = All positive instances, as of ground truth.

**PR = TP / TP+FN**

In [6]:
def calc_recall(tp, y_pred):
    ''' Recall:  the fraction of the total amount of relevant instances that were actually retrieved

        TP + FN = All positive instances, as of ground truth.
    '''
    return tp / tp+fn

### Calulate Confusion Matrix

#### `calc_CM_params(y_true, y_pred)`

Meaning:

- True  Positives (TP): classifier correctly predicted the positive (value=1) class
- False Positives (FP): classifier incorrectly predicted 1 (the ground truth was 0)
- True  Negative  (TN): classifier correctly predicted the negative (value=0) class
- False Negative  (FN): classifier incorrectly predicted 0 (the ground truth was 1)


In [6]:
def calc_CM_params(y_true, y_pred):
    ''' Meaning:

        True  Positives (TP): classifier correctly predicted the positive (value=1) class
        False Positives (FP): classifier incorrectly predicted 1 (the ground truth was 0)
        True  Negative  (TN): classifier correctly predicted the negative (value=0) class
        False Negative  (FN): classifier incorrectly predicted 0 (the ground truth was 1)

    '''
    tp = ( (y_true==1) & (y_pred==1) ).sum()
    tn = ( (y_true==0) & (y_pred==0) ).sum()
    fp = ( (y_true==0) & (y_pred==1) ).sum()
    fn = ( (y_true==1) & (y_pred==0) ).sum()

    return {
        'TN': tn,
        'FP': fp,
        'FN': fn,
        'TP': tp,
    }


In [8]:
def print_CM(y_true, y_pred):
    params = calc_CM_params(y_true, y_pred)
    
    for k, v in params.items():
        print(f"{k}: {v}")

    # print(f"CM: \n{cm}")
    # print(f"C_00 - TN")
    # print(f"C_10 - FN")
    # print(f"C_11 - TP")
    # print(f"C_01 - FP")
    
#     print(f"CM: \n{cm}")


In [10]:
# imagine, we've train a classifier to clasify documents as :
# relevant      - the positive class (value 1)
# irrelevant    - the negative class (value 0)

class_names = ['irrelevant', 'relevant']
class_values = [0, 1]

# the ground truth:  3 - positive, 7 - negative:
# the predicted:     4 - positive, 6 - negative
y_true = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0])
y_pred = np.array([1, 0, 0, 1, 1, 1, 0, 0, 0, 0])

# calc CM:
cm = confusion_matrix(y_true, y_pred)

# print Confusion Matrix:
print_CM(cm)

show_CM_as_heatmap(cm)

TN: 4
FP: 3
FN: 2
TP: 1
CM: 
[[4 3]
 [2 1]]
