In [1]:
import os

In [2]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Fraud Detection\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Fraud Detection'

In [5]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class ModelEvaluationConfig:
    root_dir : Path
    best_model_path: Path
    save_path : Path
    

In [6]:
from fraud_detection.utils.common import read_yaml
from fraud_detection.constants import *
from fraud_detection.utils.common import create_directories, save_object
from fraud_detection.entity import DataTransformationConfig

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH):
        self.config = read_yaml(config_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_transformation(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_path=config.train_path,
            test_path=config.test_path,
            train_data=config.train_data,
            test_data=config.test_data,
            preprocessor=config.preprocessor
        )

        return data_transformation_config


    def get_model_evaluation(self) -> ModelEvaluationConfig:
        
        config = self.config.model_tuner.model_evaluation

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            best_model_path=config.best_model_path,
            save_path =config.save_path
                    )

        return model_evaluation_config

        

    

In [8]:
from pathlib import Path
from fraud_detection.utils.common import save_json  
import numpy as np
from sklearn.metrics import (
    f1_score,
    roc_auc_score,
    precision_score,
    recall_score,
    confusion_matrix
)
from fraud_detection.conponents.data_transformation import DataTransformation
import joblib  # for loading the model
import os
import json
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report)

In [9]:
class ModelEvaluator:
    def __init__(self, config, data_transformer, positive_label=1):
        """
        positive_label: label representing the 'fraud' class, usually 1
        """
        self.config = config
        self.data_transformer = data_transformer
        self.save_path = config.save_path
        self.positive_label = positive_label
        self.model = self._load_model(config.best_model_path)

    def _load_model(self, model_path):
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Model file not found at {model_path}")
        return joblib.load(model_path)

    def evaluate(self):
        (
            X_train,
            X_val,
            X_test,
            y_train,
            y_val,
            y_test,
            preprocessor_path
        ) = self.data_transformer.initiate_data_transformation_and_split()

        y_val_pred = self.model.predict(X_val)
        y_val_prob = None
        if hasattr(self.model, "predict_proba"):
            y_val_prob = self.model.predict_proba(X_val)[:, 1]

        # Overall accuracy
        accuracy = accuracy_score(y_val, y_val_pred)

        # Metrics for positive (fraud) class explicitly
        precision = precision_score(y_val, y_val_pred, pos_label=self.positive_label)
        recall = recall_score(y_val, y_val_pred, pos_label=self.positive_label)
        f1 = f1_score(y_val, y_val_pred, pos_label=self.positive_label)

        # ROC AUC (for positive class probabilities)
        try:
            roc_auc = roc_auc_score(y_val, y_val_prob) if y_val_prob is not None else None
        except ValueError:
            roc_auc = None  # Handle cases with only one class in y_val

        conf_matrix = confusion_matrix(y_val, y_val_pred).tolist()

        # Print detailed classification report for both classes
        print("=== Classification Report (All Classes) ===")
        print(classification_report(y_val, y_val_pred))

        print("=== Confusion Matrix ===")
        print(conf_matrix)

        print("=== Fraud Class (Positive Label) Metrics ===")
        print(f"Accuracy:  {accuracy:.4f}")
        print(f"Precision: {precision:.4f}  (fraud)")
        print(f"Recall:    {recall:.4f}  (fraud)")
        print(f"F1 Score:  {f1:.4f}  (fraud)")
        if roc_auc is not None:
            print(f"ROC AUC:   {roc_auc:.4f}  (fraud)")
        else:
            print("ROC AUC:   N/A")

        results = {
            "validation_accuracy": accuracy,
            "validation_f1_score_fraud": f1,
            "validation_precision_fraud": precision,
            "validation_recall_fraud": recall,
            "validation_roc_auc_fraud": roc_auc,
            "validation_confusion_matrix": conf_matrix
        }

        if self.save_path:
            save_json(Path(self.save_path), results)
            print(f"[ModelEvaluator] Results saved to {self.save_path}")

        return results

In [10]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation()
    data_transformer = DataTransformation(config=data_transformation_config)
    model_evaluation_config = config.get_model_evaluation()
    model_evaluation= ModelEvaluator(config=model_evaluation_config, data_transformer=data_transformer)
    model_evaluation.evaluate()
except Exception as e:
    raise e

[2025-07-04 20:54:47,730: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-07-04 20:54:47,737: INFO: common: created directory at: artifacts]
[2025-07-04 20:54:47,741: INFO: common: created directory at: artifacts/data_transformation]
[2025-07-04 20:54:47,742: INFO: common: created directory at: artifacts/model_evaluation]
Transaction Date column after conversion:
0   2024-02-20 05:58:41
1   2024-02-25 08:09:45
2   2024-03-18 03:42:55
3   2024-03-16 20:41:31
4   2024-01-15 05:08:17
Name: Transaction Date, dtype: datetime64[ns]
Data type: datetime64[ns]
Transaction Date column after conversion:
0   2024-03-24 23:42:43
1   2024-01-22 00:53:31
2   2024-01-22 08:06:03
3   2024-01-16 20:34:53
4   2024-01-16 15:47:23
Name: Transaction Date, dtype: datetime64[ns]
Data type: datetime64[ns]
[2025-07-04 20:55:03,411: INFO: data_transformation: Building preprocessing pipeline.]
[2025-07-04 20:55:03,543: INFO: data_transformation: Applying preprocessing pipeline.]




=== Classification Report (All Classes) ===
              precision    recall  f1-score   support

           0       0.96      1.00      0.98    279823
           1       0.81      0.14      0.25     14768

    accuracy                           0.96    294591
   macro avg       0.88      0.57      0.61    294591
weighted avg       0.95      0.96      0.94    294591

=== Confusion Matrix ===
[[279319, 504], [12629, 2139]]
=== Fraud Class (Positive Label) Metrics ===
Accuracy:  0.9554
Precision: 0.8093  (fraud)
Recall:    0.1448  (fraud)
F1 Score:  0.2457  (fraud)
ROC AUC:   0.8113  (fraud)
[2025-07-04 20:55:06,950: INFO: common: json file saved at: artifacts\model_evaluation\evaluation_results.json]
[ModelEvaluator] Results saved to artifacts/model_evaluation/evaluation_results.json
