In [12]:
from sklearn.metrics import confusion_matrix, log_loss, matthews_corrcoef
import pandas as pd

Source : https://towardsdatascience.com/the-explanation-you-need-on-binary-classification-metrics-321d280b590f

# Evaluation Metrics
It is important to use the right metric to evaluate our models to have the right idea on its performance. In our case we are doing a binary classification. The target is either 1 or 0. In the dataste, there is 95469 non-fraudes observations for 193 fraudes. Because we are working with unbalanced dataset (their is much more non fraude data), there is a risk that the model could only predict 0 and have a good result. We have to find a way to prevent it. It is also important to not evaluat the prediction of a dataset that was also used in the training.

Our goal as analysts is to contextualize and understand which metric offers us the most value. The metrics we're going to cover are : 
1. Accuracy : It tells the number of correct answer over the total number of answer. It is a common and simple metric but not recommenderd for our case of unbalanced data. Because if the model only predicts 0, it can have an accuracy of 99,7% while having failed to predict all the frauds. 
2. Precision : $TP \over (TP+FP)$ It show how sensitive is the model to the signal to be recognized. So how often we are correct when we classify a class positive. More it is close to 1 and better it is. A high-precision model, means that it doesn't always recognize a fraude, but when it predicts it,  it is sure of it. 
3. Recall : $TP \over (TP+FN)$ A model with a high recall will recognize as many positive classes as possible. We want a high recall if we want to be sure to detect all the fraud and don't care that sometimes it classifies non fraude as fraudes. (Includes noices)
4. F1 score : combines precision and recall (2 complementary metrics) into one metric. $2 *$ $ {precision*recall} \over {precision+recall}$ It is probably the most used metric for evaluating binary classification models. If our F1 score increases, it means that our model has increased performance for accuracy, recall or both.
5. Log loss : This metric measures the difference between the probabilities of the model’s predictions and the probabilities of observed reality. The goal of this metric is to estimate the probability that an example has a positive class.
6. Matthews Correlation Coefficient (MCC) : is designed to evaluate even models trained on unbalanced dataset. It ranges between $-1$ and $+1$. We want it to be near $+1$ since it indicates a correlation between actual observed values and predictions made by our model.

In [2]:
def precision(val_y, pred_y):
    cm = confusion_matrix(val_y, pred_y)
    tn, fp, fn, tp = cm.ravel()
    metric = tp / (tp+fp)
    return metric

In [3]:
def recall(val_y, pred_y):
    cm = confusion_matrix(val_y, pred_y)
    tn, fp, fn, tp = cm.ravel()
    metric = tp / (tp+fn)
    return metric

In [4]:
def f1score(val_y, pred_y):
    cm = confusion_matrix(val_y, pred_y)
    tn, fp, fn, tp = cm.ravel()
    precision = tp / (tp+fp)
    recall = tp / (tp+fn)
    metric = 2*((precision*recall)/(precision+recall))
    return metric

In [5]:
def logloss(val_y, pred_y):
    metric = log_loss(val_y, pred_y)
    return metric

In [6]:
def mcc(val_y, pred_y):
    metric = matthews_corrcoef(val_y, pred_y)
    return metric

In [11]:
def report(val_y, pred_y, model, description, csv = False):
    precision = precision(val_y, pred_y)
    recall = recall(val_y, pred_y)
    f1score = f1score(val_y, pred_y)
    logloss = logloss(val_y, pred_y)
    mcc = mcc(val_y, pred_y)
    d = {'Model': [model], 'Description': [description], 'Date':[pd.Timestamp.now()],
         'Precision' : [precision], 'Recall' : [recall], 'F1-score' : [f1score], 'LogLoss' : [logloss], 'Mcc' : [mcc]}
    df = pd.DataFrame(data=d)
    if csv : 
        df.to_csv('evaluationmetric.csv', mode='a', header=False)
    return df
    

In [8]:
def initreportcsv():
    d = {'Model': ["None"], 'Description': ['Table init'], 'Date':[pd.Timestamp.now()],
         'Precision' : [0], 'Recall' : [0], 'F1-score' : [0], 'LogLoss' : [0], 'Mcc' : [0]}
    df = pd.DataFrame(data=d)
    df.to_csv('evaluationmetric.csv', mode='a')

In [10]:
def showreportcsv():
    df = pd.read_csv('evaluationmetric.csv')
    df.drop("Unnamed: 0", axis=1, inplace = True)
    print(df.to_string()) 
    return df
showreportcsv()

   Model Description                        Date  Precision  Recall  F1-score  LogLoss  Mcc
0    NaN  Table init  2023-04-26 11:57:47.065184          0       0         0        0    0


Unnamed: 0,Model,Description,Date,Precision,Recall,F1-score,LogLoss,Mcc
0,,Table init,2023-04-26 11:57:47.065184,0,0,0,0,0
