In [26]:
import pandas as pd
import numpy as np
import requests
import json
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, precision_recall_curve, average_precision_score

In [8]:
df = pd.read_csv(r'C:\Users\Asus\Downloads\Fraud_MLOps_Project\Data\combined_holdout.csv')
df

Unnamed: 0,accountAgeDays,numItems,localTime,paymentMethod,paymentMethodAgeDays,Category,isWeekend,target,dataset
0,3,1,4.524580,creditcard,2.564583,shopping,1.0,0,A
1,44,1,4.836982,creditcard,0.000000,shopping,0.0,0,A
2,253,1,4.965339,creditcard,0.000000,electronics,0.0,0,A
3,122,1,4.895263,creditcard,0.002778,food,1.0,0,A
4,1942,1,4.742303,creditcard,0.019444,food,1.0,0,A
...,...,...,...,...,...,...,...,...,...
295,1,1,4.748314,creditcard,0.004167,electronics,,1,C
296,1,1,4.745402,creditcard,0.000000,food,,1,C
297,1,1,2.596228,storecredit,0.000000,shopping,,1,C
298,1,1,5.034622,paypal,0.000694,food,,1,C


In [27]:
def evaluate_model(y_true, y_pred, dataset_name="Evaluation", is_proba=False):
    """
    Prints and returns classification metrics with optional PR-AUC and threshold tuning.
    
    Parameters
    ----------
    y_true : array-like
        True labels.
    y_pred : array-like
        Predicted labels or probabilities.
    dataset_name : str
        Name for the dataset (used in print titles and keys).
    is_proba : bool
        If True, `y_pred` is treated as probabilities for PR-AUC and threshold tuning.
    """
    best_threshold = 0.5  # Default threshold

    if is_proba:
        # --- Compute PR-AUC ---
        pr_auc = average_precision_score(y_true, y_pred)
        precision, recall, thresholds = precision_recall_curve(y_true, y_pred)

        # --- Find best threshold (maximize F1) ---
        f1_scores = 2 * (precision * recall) / (precision + recall + 1e-6)
        best_idx = f1_scores.argmax()
        best_threshold = thresholds[best_idx] if best_idx < len(thresholds) else 0.5

        print(f"\n--- {dataset_name} PR-AUC: {pr_auc:.4f} ---")
        print(f"Optimal Threshold: {best_threshold:.3f} "
              f"(Precision={precision[best_idx]:.3f}, Recall={recall[best_idx]:.3f})")

        # Convert probabilities to binary using best threshold
        y_pred = (y_pred >= best_threshold).astype(int)

    # --- Classification report ---
    print(f"\n--- {dataset_name} Classification Report ---")
    print(classification_report(y_true, y_pred))

    # --- Compute summary metrics ---
    accuracy = accuracy_score(y_true, y_pred)
    precision_val = precision_score(y_true, y_pred)
    recall_val = recall_score(y_true, y_pred)
    f1_val = f1_score(y_true, y_pred)

    print(f"{dataset_name} Accuracy:  {accuracy:.4f}")
    print(f"{dataset_name} Precision: {precision_val:.4f}")
    print(f"{dataset_name} Recall:    {recall_val:.4f}")
    print(f"{dataset_name} F1 Score:   {f1_val:.4f}")

    # --- Return results ---
    result = {
        f"{dataset_name}_accuracy": accuracy,
        f"{dataset_name}_precision": precision_val,
        f"{dataset_name}_recall": recall_val,
        f"{dataset_name}_f1": f1_val,
    }
    if is_proba:
        result[f"{dataset_name}_pr_auc"] = pr_auc
        result[f"{dataset_name}_best_threshold"] = best_threshold

    return result

# Docker

In [31]:

# ----------------------------
# 1. Load data
# ----------------------------
# Assuming df is already loaded
X_combined = df.drop(['target', 'dataset'], axis=1)
y_combined = df['target']

url = "http://127.0.0.1:5001/invocations"

# Send CSV as bytes
with open(r'C:\Users\Asus\Downloads\Fraud_MLOps_Project\Data\combined_holdout.csv', "rb") as f:
    response = requests.post(
        url,
        headers={"Content-Type": "text/csv"},
        data=f.read()
    )

if response.status_code == 200:
    preds = pd.Series(response.json())['predictions']
    print("Predictions received!")
    evaluate_model(y_combined, preds, dataset_name="Combined Holdout")
else:
    print(f"Error: {response.status_code}\n{response.text}")

Predictions received!

--- Combined Holdout Classification Report ---
              precision    recall  f1-score   support

           0       1.00      0.98      0.99       248
           1       0.91      0.98      0.94        52

    accuracy                           0.98       300
   macro avg       0.95      0.98      0.97       300
weighted avg       0.98      0.98      0.98       300

Combined Holdout Accuracy:  0.9800
Combined Holdout Precision: 0.9107
Combined Holdout Recall:    0.9808
Combined Holdout F1 Score:   0.9444


### Docker Working