# Useful functions:

In [54]:
import pandas as pd
import numpy as np
from sklearn import metrics

from IPython.display import display, HTML

## Binary_Classifier_Verification:

In [59]:
def Binary_Classifier_Verification(actual, predicted, type_info = "", cutoff = 0.5,
                                   FN_cost = 1, FP_cost = 1, TN_cost = 0, TP_cost = 0,
                                   save = False):
    
    result_1 = pd.DataFrame({
        "Confusion Matrix" : ["Actual Negative (0)", "Actual Positive (1)"],
        "Predicted Negative (0)" : ["True Negative (TN)", "False Negative (FN)"],
        "Predicted Positive (1)" : ["False Positive (FP)", "True Positive (TP)"]})
    
    result_2 = pd.DataFrame({
        "Actual Class" : actual,
        "Prediction" : predicted,
        "Predicted Class" : np.where(predicted < cutoff, 0, 1)})
    result_2["Actual Class"] = pd.Categorical(result_2["Actual Class"], ordered = True, categories = [0,  1])
    result_2["Predicted Class"] = pd.Categorical(result_2["Predicted Class"], ordered = True, categories = [0,  1])
    result_3 = pd.pivot_table(result_2, index = ["Actual Class"], columns = ["Predicted Class"], 
                              aggfunc = 'count', dropna = False, fill_value = 0)
    result_3 = result_3.reset_index(drop=True)
    
    # Assessment of classifier effectiveness:
    OBS = result_3.iloc[0, 0] + result_3.iloc[0, 1] + result_3.iloc[1, 0] + result_3.iloc[1, 1]; OBS_label = "= TN + FP + FN + TP"
    TN = result_3.iloc[0, 0]; TN_label = "= TN"
    FP = result_3.iloc[0, 1]; FP_label = "= FP"
    FN = result_3.iloc[1, 0]; FN_label = "= FN"
    TP = result_3.iloc[1, 1]; TP_label = "= TP"
    P = FN + TP; P_label = "= FN + TP"
    N = TN + FP; N_label = "= TN + FP"
    # Accuracy (ACC):
    ACC = (TN + TP)/(TN + FN + FP + TP)
    ACC_label = "= (TN + TP)/(TN + FN + FP + TP) = (TN + TP)/(P + N)"
    # Balanced Accuracy (BACC):
    BACC = (TN/(TN + FP) + TP/(FN + TP))/2
    BACC_label = "= (TN/(TN + FP) + TP/(FN + TP))/2"
    # Area Under Curve (AUC):
    fpr, tpr, thresholds = metrics.roc_curve(result_2["Actual Class"], result_2["Prediction"])
    AUC = metrics.auc(fpr, tpr)
    AUC_label = "= Area Under ROC Curve"
    # Bias:
    BIAS = np.mean(result_2["Actual Class"].astype("int") - result_2["Predicted Class"].astype("int"))
    BIAS_label = "= mean(actual) - mean(predicted)"
    # Classification Error (CE):
    CE = (FN + FP)/(TN + FN + FP + TP)
    CE_label = "= (FN + FP)/(TN + FN + FP + TP) = 1 - (TN + TP)/(TN + FN + FP + TP)"
    # Recall, Sensitivity, hit rate, True Positive Rate (TPR):
    TPR = TP/(TP + FN)
    TPR_label = "= TP/(TP + FN) = TP/P = 1 - FNR"
    # Specifity, selectivity, True Negative Rate (TNR):
    TNR = TN/(TN + FP)
    TNR_label = "= TN/(TN + FP) = TN/N = 1 - FPR"
    # # Precision, Positive Prediction Value (PPV):
    PPV = TP/(TP + FP)
    PPV_label = "= TP/(TP + FP) = 1 - FDR"
    # # Negative Predictive Value (NPV):
    NPV = TN/(TN + FN)
    NPV_label = "= TN/(TN + FN) = 1 - FOR"
    # # False Negative Rate (FNR), miss rate:
    FNR = FN/(FN + TP)
    FNR_label = "= FN/(FN + TP) = FN/P = 1 - TPR"
    # False Positive Rate (FPR), fall-out:
    FPR = FP/(FP + TN)
    FPR_label = "= FP/(FP + TN) = FP/N = 1 - TNR"
    # False Discovery Rate (FDR):
    FDR = FP/(FP + TP)
    FDR_label = "= FP/(FP + TP) = 1 - PPV"
    # False Omission Rate (FOR):
    FOR = FN/(FN + TN)
    FOR_label = "= FN/(FN + TN) = 1 - NPV"
    # Threat Score (TS), Critical Success Index (CSI):
    TS = TP/(TP + FN + FP)
    TS_label = "= TP/(TP + FN + FP)"
    # F1:
    F1 = (2 * PPV * TPR)/(PPV + TPR)
    F1_label = "= (2 * PPV * TPR)/(PPV + TPR) = 2 * TP/(2 * TP + FP + FN)"
    # Informedness, Bookmaker Informedness (BM):
    BM = TPR + TNR - 1
    BM_label = "= TPR + TNR - 1"
    # Markedness (MK):
    MK = PPV + NPV - 1
    MK_label = "= PPV + NPV - 1"
    # Gini Index:
    GINI = 2 * AUC - 1
    GINI_label = "= 2 * AUC - 1"
    # Cost:
    COST = FN * FN_cost + FP * FP_cost + TN * TN_cost + TP * TP_cost
    COST_label = "= FN * FN_cost + FP * FP_cost + TN * TN_cost + TP * TP_cost"

    result_4 = pd.DataFrame({
        "Metric" : ["Number of Observations", "True Negative", "False Positive", "False Negative",
                    "True Positive", "Condition Negative", "Condition Positive", "Accuracy", 
                    "Balanced Accuracy", "Area Under ROC Curve", "Bias", "Classification Error",
                    "True Positive Rate", "True Negative Rate", "Positive Prediction Value", "Negative Predictive Value",
                    "False Negative Rate", "False Positive Rate", "False Discovery Rate", "False Omission Rate",
                    "Threat Score", "F1 Score", "Bookmaker Informedness", "Markedness",
                    "Gini Index", "Cost"],
        "Metric Abb" : ["RECORDS", "TN", "FP", "FN",
                        "TP", "N", "P", "ACC",
                        "BACC", "AUC", "BIAS", "CE",
                        "TPR", "TNR", "PPV", "NPV",
                        "FNR", "FPR", "FDR", "FOR",
                        "TS", "F1", "BM", "MK",
                        "GINI", "COST"],
        "Metric Name" : ["-", "-", "Type I Error", "Type II Error",
                         "-", "-", "-", "-",
                         "-", "-", "-", "-",
                         "Sensitivity, Recall, Hit Rate", "Specifity, Selectivity", "Precision", "-",
                         "Miss Rate", "Fall-Out", "-", "-", 
                         "Critical Success Index", "-", "-", "-",
                         "-", "-"],
        "Score" : [OBS, TN, FP, FN, 
                   TP, N, P, ACC,
                   BACC, AUC, BIAS, CE,
                   TPR, TNR, PPV, NPV,
                   FNR, FPR, FDR, FOR,
                   TS, F1, BM, MK,
                   GINI, COST],
        "Calculation" : [OBS_label, TN_label, FP_label, FN_label, 
                         TP_label, N_label, P_label, ACC_label,
                         BACC_label, AUC_label, BIAS_label, CE_label,
                         TPR_label, TNR_label, PPV_label, NPV_label,
                         FNR_label, FPR_label, FDR_label, FOR_label, 
                         TS_label, F1_label, BM_label, MK_label, 
                         GINI_label, COST_label]})
    
    result_3 = pd.DataFrame({
        "Confusion Matrix" : ["Actual Negative (0)", "Actual Positive (1)"],
        "Predicted Negative (0)" : [TN, FN],
        "Predicted Positive (1)" : [FP, TP]})
    
    display(result_1)
    display(result_3)
    display(result_4)
    
    return({
        "Confusion_Matrix_Explanation" : result_1,
        "Confusion_Matrix_Result" : result_3,
        "Assessment_of_Classifier_Effectiveness" : result_4})

Binary_Classifier_Verification(actual = [0] * 5 + [1] * 5,
                               predicted = np.random.random(10))

Unnamed: 0,Confusion Matrix,Predicted Negative (0),Predicted Positive (1)
0,Actual Negative (0),True Negative (TN),False Positive (FP)
1,Actual Positive (1),False Negative (FN),True Positive (TP)


Unnamed: 0,Confusion Matrix,Predicted Negative (0),Predicted Positive (1)
0,Actual Negative (0),3,2
1,Actual Positive (1),1,4


Unnamed: 0,Metric,Metric Abb,Metric Name,Score,Calculation
0,Number of Observations,RECORDS,-,10.0,= TN + FP + FN + TP
1,True Negative,TN,-,3.0,= TN
2,False Positive,FP,Type I Error,2.0,= FP
3,False Negative,FN,Type II Error,1.0,= FN
4,True Positive,TP,-,4.0,= TP
5,Condition Negative,N,-,5.0,= TN + FP
6,Condition Positive,P,-,5.0,= FN + TP
7,Accuracy,ACC,-,0.7,= (TN + TP)/(TN + FN + FP + TP) = (TN + TP)/(P...
8,Balanced Accuracy,BACC,-,0.7,= (TN/(TN + FP) + TP/(FN + TP))/2
9,Area Under ROC Curve,AUC,-,0.6,= Area Under ROC Curve


{'Confusion_Matrix_Explanation':       Confusion Matrix Predicted Negative (0) Predicted Positive (1)
 0  Actual Negative (0)     True Negative (TN)    False Positive (FP)
 1  Actual Positive (1)    False Negative (FN)     True Positive (TP),
 'Confusion_Matrix_Result':       Confusion Matrix  Predicted Negative (0)  Predicted Positive (1)
 0  Actual Negative (0)                       3                       2
 1  Actual Positive (1)                       1                       4,
 'Assessment_of_Classifier_Effectiveness':                        Metric Metric Abb                    Metric Name  \
 0      Number of Observations    RECORDS                              -   
 1               True Negative         TN                              -   
 2              False Positive         FP                   Type I Error   
 3              False Negative         FN                  Type II Error   
 4               True Positive         TP                              -   
 5          Con