In [1]:
import os

In [2]:
%pwd

'f:\\Files\\DS&ML\\E2E-Credit-Fraud-Detection\\Exp'

In [3]:
os.chdir('../')
%pwd

'f:\\Files\\DS&ML\\E2E-Credit-Fraud-Detection'

In [4]:
from dataclasses import dataclass
from pathlib import Path

In [5]:
@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_path: Path
    preprocess_path: Path
    model_path: Path
    all_params: dict
    metrics_path: str
    target_column: str

In [6]:
from project.constants import *
from project.utils.common import * 
from xgboost import XGBClassifier

In [7]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_PATH,
                 params_filepath = PARAMS_PATH,
                 schema_filepath = SCHEMA_PATH):
        
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        params = self.params.XGBClassifier
        schema = self.schema.target_column

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_path=config.test_path,
            model_path=config.model_path,
            all_params = params,
            preprocess_path=config.preprocess_path,
            metrics_path=config.metrics_path,
            target_column=schema.name
        )

        return model_evaluation_config

In [8]:
import pandas as pd
import json
import os
import joblib
import mlflow
import mlflow.xgboost
import dagshub
from pathlib import Path
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
from project import logger
from project.entity.config_entity import ModelEvaluationConfig


class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
        

    def evaluation(self):
        # Validate file paths
        if not os.path.exists(self.config.test_path):
            raise FileNotFoundError(f"Test data file not found at {self.config.test_path}")
        if not os.path.exists(self.config.preprocess_path):
            raise FileNotFoundError(f"Preprocessor file not found at {self.config.preprocess_path}")
        if not os.path.exists(self.config.model_path):
            raise FileNotFoundError(f"Model file not found at {self.config.model_path}")


        # Load preprocessor and model
        logger.info("Loading preprocessor and model...")
        preprocessor = joblib.load(self.config.preprocess_path)
        model = joblib.load(self.config.model_path)

        # Extract best estimator if model is RandomizedSearchCV
        if hasattr(model, 'best_estimator_'):
            logger.info("Model is a RandomizedSearchCV object, extracting best estimator...")
            best_params = model.best_params_
            model = model.best_estimator_
        else:
            best_params = model.get_params()
            logger.info("Model is a direct estimator, using its parameters...")


        # Load test and train data
        test_data = pd.read_csv(self.config.test_path)
        target_column = self.config.target_column

        if target_column not in test_data.columns:
            raise ValueError(f"Target column '{target_column}' not found in test data.")

        # Prepare test and train data
        test_x = test_data.drop(columns=[target_column])
        test_y = test_data[target_column]

        test_x_preprocessed = preprocessor.transform(test_x)

        # Make predictions
        test_predictions = model.predict(test_x_preprocessed)

        # Get predicted probabilities for ROC
        test_probabilities = model.predict_proba(test_x_preprocessed)[:, 1]
        # Calculate metrics
        metrics = {
                "test_accuracy": accuracy_score(test_y, test_predictions),
                "test_precision_weighted": precision_score(test_y, test_predictions, average='weighted'),
                "test_recall_weighted": recall_score(test_y, test_predictions, average='weighted'),
                "test_f1_weighted": f1_score(test_y, test_predictions, average='weighted'),
                "test_auc": auc(*roc_curve(test_y, test_probabilities)[:2])
        }

        
        logger.info(f"Model evaluation metrics: {metrics}")

        # Log confusion matrix
        cm = confusion_matrix(test_y, test_predictions)
        plt.figure(figsize=(6, 4))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
        plt.title("Confusion Matrix")
        plt.ylabel("True Label")
        plt.xlabel("Predicted Label")
        cm_path = Path(self.config.root_dir)/"cm.png"
        plt.savefig(cm_path)
        plt.close()
        logger.info(f"Confusion matrix saved and logged at {cm_path}")

        # Log ROC curve
        fpr, tpr, _ = roc_curve(test_y, test_probabilities)
        roc_auc = metrics["test_auc"]
        plt.figure(figsize=(6, 4))
        plt.plot(fpr, tpr, color="blue", label=f"ROC curve (AUC = {roc_auc:.2f})")
        plt.plot([0, 1], [0, 1], color="red", linestyle="--") 
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.title("Receiver Operating Characteristic (ROC) Curve")
        plt.legend(loc="lower right")
        roc_path = Path(self.config.root_dir)/"roc.png"
        plt.savefig(roc_path, bbox_inches="tight")
        plt.close()
        logger.info(f"ROC curve saved at {roc_path}")


        # Save and log metrics
        metrics_file = Path(self.config.root_dir) / "metrics.json"
        with open(metrics_file, 'w') as f:
            json.dump(metrics, f)
        logger.info(f"Metrics saved to {metrics_file}")


        return metrics

In [9]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.evaluation()

except Exception as e:
    logger.exception(e)
finally:
    logger.info("Model evaluation completed")

[2025-04-21 17:51:58,439: INFO: common: yaml file: yaml file\config.yaml loaded successfully]
[2025-04-21 17:51:58,461: INFO: common: yaml file: yaml file\schema.yaml loaded successfully]
[2025-04-21 17:51:58,466: INFO: common: yaml file: yaml file\params.yaml loaded successfully]
[2025-04-21 17:51:58,466: INFO: common: created directory at: artifacts]
[2025-04-21 17:51:58,466: INFO: common: created directory at: artifacts/model_evaluation]
[2025-04-21 17:51:58,472: INFO: 3400358762: Loading preprocessor and model...]
[2025-04-21 17:51:59,009: INFO: 3400358762: Model is a RandomizedSearchCV object, extracting best estimator...]
[2025-04-21 17:51:59,115: INFO: 3400358762: Model evaluation metrics: {'test_accuracy': 0.9365979381443299, 'test_precision_weighted': 0.93986165708818, 'test_recall_weighted': 0.9365979381443299, 'test_f1_weighted': 0.9365057040580845, 'test_auc': np.float64(0.9769071388495418)}]
[2025-04-21 17:51:59,310: INFO: 3400358762: Confusion matrix saved and logged at a