In [1]:
# Initialization: Load shared functions

# Load shared functions
!curl -O https://raw.githubusercontent.com/Fraud-Detection-Handbook/fraud-detection-handbook/main/Chapter_References/shared_functions.py
%run shared_functions.py

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 63257  100 63257    0     0   169k      0 --:--:-- --:--:-- --:--:--  168k


In [3]:
# Getting classes from a vector of fraud probabilities and a threshold
def get_class_from_fraud_probability(fraud_probabilities, threshold=0.5):

    predicted_classes = [0 if fraud_probability<threshold else 1
                         for fraud_probability in fraud_probabilities]

    return predicted_classes

In [4]:
# Implementation of the mean misclassification error
def compute_MME(true_labels,predicted_classes):

    N = len(true_labels)

    MME = np.sum(np.array(true_labels)!=np.array(predicted_classes))/N

    return MME

In [6]:
# 2 fraudulent and 8 genuine transactions
true_labels = [1,1,0,0,0,0,0,0,0,0]

# Probability of fraud for each transaction
fraud_probabilities = [0.9,0.35,0.45,0.4,0.2,0.2,0.2,0.1,0.1,0]

In [8]:
def threshold_based_metrics(fraud_probabilities, true_label, thresholds_list):

    results = []

    for threshold in thresholds_list:

        predicted_classes = get_class_from_fraud_probability(fraud_probabilities, threshold=threshold)

        (TN, FP, FN, TP) = metrics.confusion_matrix(true_label, predicted_classes).ravel()

        MME = (FP+FN)/(TN+FP+FN+TP)

        TPR = TP/(TP+FN)
        TNR = TN/(TN+FP)

        FPR = FP/(TN+FP)
        FNR = FN/(TP+FN)

        BER = 1/2*(FPR+FNR)

        Gmean = np.sqrt(TPR*TNR)

        precision = 0
        FDR = 0
        F1_score=0

        if TP+FP>0:
            precision = TP/(TP+FP)
            FDR=FP/(TP+FP)

        NPV = 0
        FOR = 0

        if TN+FN>0:
            NPV = TN/(TN+FN)
            FOR = FN/(TN+FN)


        if precision+TPR>0:
            F1_score = 2*(precision*TPR)/(precision+TPR)

        results.append([threshold, MME, TPR, TNR, FPR, FNR, BER, Gmean, precision, NPV, FDR, FOR, F1_score])

    results_df = pd.DataFrame(results,columns=['Threshold' ,'MME', 'TPR', 'TNR', 'FPR', 'FNR', 'BER', 'G-mean', 'Precision', 'NPV', 'FDR', 'FOR', 'F1 Score'])

    return results_df

In [9]:
unique_thresholds = list(set(fraud_probabilities))
unique_thresholds.sort(reverse=True)
unique_thresholds

[0.9, 0.45, 0.4, 0.35, 0.2, 0.1, 0]

In [10]:
results_df = threshold_based_metrics(fraud_probabilities, true_labels, unique_thresholds)

In [11]:
results_df

Unnamed: 0,Threshold,MME,TPR,TNR,FPR,FNR,BER,G-mean,Precision,NPV,FDR,FOR,F1 Score
0,0.9,0.1,0.5,1.0,0.0,0.5,0.25,0.707107,1.0,0.888889,0.0,0.111111,0.666667
1,0.45,0.2,0.5,0.875,0.125,0.5,0.3125,0.661438,0.5,0.875,0.5,0.125,0.5
2,0.4,0.3,0.5,0.75,0.25,0.5,0.375,0.612372,0.333333,0.857143,0.666667,0.142857,0.4
3,0.35,0.2,1.0,0.75,0.25,0.0,0.125,0.866025,0.5,1.0,0.5,0.0,0.666667
4,0.2,0.5,1.0,0.375,0.625,0.0,0.3125,0.612372,0.285714,1.0,0.714286,0.0,0.444444
5,0.1,0.7,1.0,0.125,0.875,0.0,0.4375,0.353553,0.222222,1.0,0.777778,0.0,0.363636
6,0.0,0.8,1.0,0.0,1.0,0.0,0.5,0.0,0.2,0.0,0.8,0.0,0.333333
