Creating a Python function that takes in actual labels and predicted labels and returns these key metrics (Accuracy, Precision, Recall, F1-score, ROC-AUC, and the Confusion Matrix).

In [1]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

def evaluate_model(y_true, y_pred, y_pred_proba=None):

# Initialize dictionary to hold all evaluation metrics
    metrics = {}

    # Calculate and store the evaluation metrics
    metrics['Accuracy'] = accuracy_score(y_true, y_pred)
    metrics['Precision'] = precision_score(y_true, y_pred, zero_division=0)
    metrics['Recall'] = recall_score(y_true, y_pred, zero_division=0)
    metrics['F1-Score'] = f1_score(y_true, y_pred, zero_division=0)

    # Confusion matrix: True Positives, False Positives, True Negatives, False Negatives
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    metrics['Confusion Matrix'] = {
        'True Positives': tp,
        'False Positives': fp,
        'True Negatives': tn,
        'False Negatives': fn
    }
    
    # ROC-AUC Score (only if predicted probabilities are provided)
    if y_pred_proba is not None:
        metrics['ROC-AUC'] = roc_auc_score(y_true, y_pred_proba)
    
    return metrics


Example of using it in practice:

In [2]:
# Assuming `y_test` are actual labels and `y_pred` are predicted labels
# If you have predicted probabilities (for ROC-AUC), include `y_pred_proba` as well

# Example
y_pred = rf_model.predict(X_test_scaled)
y_pred_proba = rf_model.predict_proba(X_test_scaled)[:, 1]  # Probabilities for the positive class (fraud)

# Call the evaluation function
evaluation_results = evaluate_model(y_test, y_pred, y_pred_proba)

# Print results
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")

NameError: name 'rf_model' is not defined

Visualising Confusion Matrix:

In [4]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=['Non-Fraud', 'Fraud'],
                yticklabels=['Non-Fraud', 'Fraud'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()

# Plot confusion matrix
plot_confusion_matrix(y_test, y_pred)

NameError: name 'y_test' is not defined

Evaluation of models:

In [None]:
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

def evaluate_models_auc(models, params):
    """
    Evaluates a list of models by training each model with the corresponding parameters 
    and calculates ROC-AUC, returning models sorted by their AUC-ROC score in descending order.

    Parameters:
        models (list): A list of model functions (e.g., LogisticRegression, RandomForestClassifier).
        params (list): A list of dictionaries containing the parameters for each model.
        
    Returns:
        list of tuples: A list of (model_name, ROC-AUC score) tuples, sorted by AUC-ROC score in descending order.
    """
    
    # Create a synthetic dataset (for demonstration purposes, replace this with your own dataset loading)
    X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
    
    # List to store the model names and AUC-ROC scores
    roc_auc_results = []
    
    # Iterate through the models and params lists
    for model_func, param_dict in zip(models, params):
        
        # Initialize the model with the given parameters
        model = model_func(**param_dict)
        
        # Train the model
        model.fit(X_train, y_train)
        
        # Get predicted probabilities (for ROC-AUC, we need probabilities, not class labels)
        y_pred_proba = model.predict_proba(X_test)[:, 1]  # Probabilities for the positive class
        
        # Calculate the ROC-AUC score
        roc_auc = roc_auc_score(y_test, y_pred_proba)
        
        # Store the model name and its ROC-AUC score
        roc_auc_results.append((model_func.__name__, roc_auc))
    
    # Sort the models by their AUC-ROC score in descending order
    roc_auc_results.sort(key=lambda x: x[1], reverse=True)
    
    return roc_auc_results

# Example usage:

# Import some classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Define the models (as functions) and corresponding parameters
models = [LogisticRegression, RandomForestClassifier, SVC]
params = [
    {'random_state': 42, 'max_iter': 1000},  # Parameters for Logistic Regression
    {'n_estimators': 100, 'random_state': 42},  # Parameters for Random Forest
    {'probability': True, 'random_state': 42}  # Parameters for SVC (needs probability=True for predict_proba)
]

# Call the evaluation function
roc_auc_results = evaluate_models_auc(models, params)

# Print the sorted ROC-AUC scores
print("Models sorted by ROC-AUC score:")
for model_name, auc_score in roc_auc_results:
    print(f"{model_name}: {auc_score}")
