In [1]:
import os

In [2]:
%pwd

'f:\\Files\\DS&ML\\FraudGuard\\notebooks'

In [3]:
os.chdir('../')
%pwd

'f:\\Files\\DS&ML\\FraudGuard'

In [4]:
from dataclasses import dataclass
from pathlib import Path

In [5]:
@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_path: Path
    preprocess_path: Path
    model_path: Path
    metrics_path: str
    target_column: str
    cm_path: Path
    roc_path: Path
    mlflow_username: str
    mlflow_password: str
    experiment_name: str
    tracking_uri: str

In [6]:
from FraudGuard.constants import *
from FraudGuard.utils.helpers import *
from FraudGuard.utils.exceptions import *
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

In [7]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_PATH,
                 params_filepath = PARAMS_PATH,
                 schema_filepath = SCHEMA_PATH):
        
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        schema = self.schema.target_column
        mlflow_params = self.params.mlflow

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir= config.root_dir,
            test_path= config.test_path,
            model_path= config.model_path,
            preprocess_path= config.preprocess_path,
            metrics_path= config.metrics_path,
            target_column= schema.name,
            cm_path= config.cm_path,
            roc_path= config.roc_path,
            mlflow_username= mlflow_params.mlflow_username,
            mlflow_password= mlflow_params.mlflow_password,
            experiment_name= mlflow_params.experiment_name,
            tracking_uri= mlflow_params.tracking_uri
        )

        return model_evaluation_config

In [None]:
import os
import json
import joblib
import mlflow
import dagshub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_curve, auc, confusion_matrix
)

from FraudGuard import logger
from FraudGuard.utils.helpers import save_json
from FraudGuard.entity.config_entity import ModelEvaluationConfig


class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

        # Set MLflow credentials
        os.environ["MLFLOW_TRACKING_USERNAME"] = self.config.mlflow_username
        os.environ["MLFLOW_TRACKING_PASSWORD"] = self.config.mlflow_password

        # Initialize Dagshub
        dagshub.init(
            repo_owner="JavithNaseem-J",
            repo_name="FraudGuard.mlflow", 
            mlflow=True
        )

        mlflow.set_tracking_uri(self.config.tracking_uri)
        mlflow.set_experiment("Fraud-Detection")

    def evaluation(self):
        # Validate paths
        if not os.path.exists(self.config.test_path):
            raise FileNotFoundError(f"Test data not found: {self.config.test_path}")
        if not os.path.exists(self.config.model_path):
            raise FileNotFoundError(f"Model not found: {self.config.model_path}")
        if not os.path.exists(self.config.preprocess_path):
            raise FileNotFoundError(f"Preprocessor not found: {self.config.preprocess_path}")

        # Load artifacts
        test_df = pd.read_csv(self.config.test_path)
        model = joblib.load(self.config.model_path)
        preprocessor = joblib.load(self.config.preprocess_path)

        # Split features/target
        target_column = self.config.target_column
        X_test = test_df.drop(columns=[target_column])
        y_test = test_df[target_column]
        X_test_transformed = preprocessor.transform(X_test)

        # Make predictions
        preds = model.predict(X_test_transformed)

        # Metrics
        metrics = {
            "accuracy": accuracy_score(y_test, preds),
            "precision_weighted": precision_score(y_test, preds, average="weighted"),
            "recall_weighted": recall_score(y_test, preds, average="weighted"),
            "f1_weighted": f1_score(y_test, preds, average="weighted"),
            "precision_macro": precision_score(y_test, preds, average="macro"),
            "recall_macro": recall_score(y_test, preds, average="macro"),
            "f1_macro": f1_score(y_test, preds, average="macro"),
        }

        if proba is not None:
            fpr, tpr, _ = roc_curve(y_test, proba)
            metrics["auc"] = auc(fpr, tpr)

        # Save metrics to JSON
        os.makedirs(self.config.root_dir, exist_ok=True)
        save_json(path=Path(self.config.metrics_path), data=metrics)

        try:
            with mlflow.start_run(run_name="Model Evaluation"):
                mlflow.log_metrics({k: float(v) for k, v in metrics.items()})
                mlflow.set_tag("stage", "evaluation")

                # Log artifacts
                mlflow.log_artifact(self.config.metrics_path)
                mlflow.log_artifact(self.config.model_path)
                mlflow.log_artifact(self.config.preprocess_path)

                # Confusion Matrix
                cm = confusion_matrix(y_test, preds)
                plt.figure(figsize=(6, 4))
                sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
                plt.title("Confusion Matrix")
                plt.xlabel("Predicted")
                plt.ylabel("Actual")
                plt.tight_layout()
                plt.savefig(self.config.cm_path)
                plt.close()
                mlflow.log_artifact(self.config.cm_path)

                # ROC Curve
                if proba is not None:
                    plt.figure(figsize=(6, 4))
                    plt.plot(fpr, tpr, label=f"AUC = {metrics['auc']:.2f}")
                    plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
                    plt.xlabel("False Positive Rate")
                    plt.ylabel("True Positive Rate")
                    plt.title("ROC Curve")
                    plt.legend()
                    plt.tight_layout()
                    plt.savefig(self.config.roc_path, bbox_inches="tight")
                    plt.close()
                    mlflow.log_artifact(self.config.roc_path)

        except Exception as e:
            logger.error(f"Error during evaluation and logging: {e}")
            raise

        logger.info("✅ Model evaluation complete. Metrics and plots logged.")
        return metrics


In [9]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.evaluation()
except Exception as e:
    raise CustomException(str(e), sys)

[2025-06-13 12:46:44,550: INFO: helpers: yaml file: config_file\config.yaml loaded successfully]


[2025-06-13 12:46:44,551: INFO: helpers: yaml file: config_file\schema.yaml loaded successfully]
[2025-06-13 12:46:44,564: INFO: helpers: yaml file: config_file\params.yaml loaded successfully]
[2025-06-13 12:46:44,567: INFO: helpers: created directory at: artifacts]
[2025-06-13 12:46:44,568: INFO: helpers: created directory at: artifacts/model_evaluation]


[2025-06-13 12:46:45,245: INFO: helpers: Repository initialized!]


2025/06/13 12:46:46 INFO mlflow.tracking.fluent: Experiment with name 'Fraud-Detection' does not exist. Creating a new experiment.


[2025-06-13 12:46:54,519: INFO: helpers: json file saved at: artifacts\model_evaluation\metrics.json]
🏃 View run Model Evaluation at: https://dagshub.com/JavithNaseem-J/FraudGuard.mlflow/#/experiments/0/runs/103626c75c1e463691d1a0b32295318a
🧪 View experiment at: https://dagshub.com/JavithNaseem-J/FraudGuard.mlflow/#/experiments/0
[2025-06-13 12:47:02,840: INFO: 2921192990: ✅ Model evaluation complete. Metrics and plots logged.]
