In [1]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, precision_recall_curve

def evaluate_baseline(model, model_type, X_test, y_test):
    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, zero_division = 1)
    
    # Plot Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Not Fraud', 'Fraud']) 
    disp.plot(cmap=plt.cm.Blues)
    plt.title(f"{model_type} - Confusion Matrix")
    plt.show()

    # Plot Precision-Recall Curve
    try:
        y_scores = model.predict_proba(X_test)[:, 1]   
    except AttributeError:
        y_scores = model.decision_function(X_test)    
    precisions, recalls, thresholds = precision_recall_curve(y_test, y_scores)
    
    # Plot the precision_recall curve
    plt.figure(figsize=(8,6))
    plt.plot(recalls, precisions, marker='.', label=f'{model_type} Fraud Model')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend()
    plt.grid()
    plt.show()

    # Print the results of each metric
    print(f"\nModel: {model_type}")
    print(f"\nTest Accuracy: {acc:.4f}")
    print(f"Test Precision: {precision:.4f}")
    print(f"Test Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

In [2]:
import numpy as np
import torch

def evaluate_nn_model(model, data_loader, device, threshold):
    model.eval()  # Set the model to evaluation mode
    all_probs = []
    all_predictions = []
    all_labels = []

    with torch.no_grad():  # Disable gradient calculation during evaluation
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels = labels.view(-1)
            outputs = model(inputs).view(-1)

            probs = torch.sigmoid(outputs)    # probabilities between 0 and 1
            predictions = (probs >= threshold).float()  # Apply threshold on probs

            all_probs.extend(probs.cpu().numpy())
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Convert lists to NumPy arrays

    all_probs = np.array(all_probs)
    all_predictions = np.array(all_predictions)
    all_labels = np.array(all_labels)

    cm = confusion_matrix(all_labels, all_predictions)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Not Fraud', 'Fraud']) 
    disp.plot(cmap=plt.cm.Blues)
    plt.title("Confusion Matrix")
    plt.show()

    # Calculate evaluation metrics
    accuracy = accuracy_score(all_labels, all_predictions)
    precision = precision_score(all_labels, all_predictions, zero_division = 1)
    recall = recall_score(all_labels, all_predictions, zero_division=1)
    f1 = f1_score(all_labels, all_predictions, zero_division=1)

    # Compute precision-recall pairs for different thresholds
    precisions, recalls, thresholds = precision_recall_curve(all_labels, all_probs)
    
    # Plot the curve
    plt.figure(figsize=(8,6))
    plt.plot(recalls, precisions, marker='.', label='Fraud Model')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend()
    plt.grid()
    plt.show()
        
    print(f"Test Accuracy: {accuracy:.4f}")
    print(f"Test Precision: {precision:.4f}")
    print(f"Test Recall: {recall:.4f}")
    print(f"Test F1_score: {f1:.4f}")
    
