In [None]:
from fraud_detection_project.constants import *
from fraud_detection_project.utils.common import read_yaml, create_directories, save_json
from fraud_detection_project import logger
from dataclasses import dataclass
from pathlib import Path
import pandas as pd
import mlflow
from urllib.parse import urlparse
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score


In [None]:
@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    all_params: dict
    metric_file_name: Path
    target_column: str
    mlflow_uri: str

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        params = self.params.XGBClassifier
        target_column = self.schema.TARGET_COLUMN.name

        create_directories([config.root_dir])

        # Ensure test_data_path points to the file saved by Data_Transformation
        # Assuming Data_Transformation saves train.csv & test.csv in the same directory as local_csv_path
        data_dir = Path(self.config.data_transformation.output_dir)  # <-- output_dir from config
        test_data_path = data_dir / "test.csv"

        # Local MLflow tracking
        local_mlflow_dir = Path("C:/mlflow_runs")
        local_mlflow_dir.mkdir(parents=True, exist_ok=True)

        return ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=test_data_path,
            model_path=config.model_path,
            all_params=params,
            metric_file_name=config.metric_file_name,
            target_column=target_column,
            mlflow_uri=f"file:///{local_mlflow_dir.as_posix()}"
        )


In [None]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    def evaluate_model(self, y_true, y_pred):
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average='weighted')
        recall = recall_score(y_true, y_pred, average='weighted')
        return accuracy, precision, recall

    def log_into_mlflow(self):
        # Load test data from Data_Transformation output
        test_data = pd.read_csv(self.config.test_data_path)
        model = joblib.load(self.config.model_path)

        test_x = test_data.drop([self.config.target_column], axis=1)
        test_y = test_data[self.config.target_column]

        mlflow.set_tracking_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():
            predicted = model.predict(test_x)
            accuracy, precision, recall = self.evaluate_model(test_y, predicted)

            # Save metrics locally
            scores = {"accuracy": accuracy, "precision": precision, "recall": recall}
            save_json(Path(self.config.metric_file_name), scores)

            # Log parameters and metrics to MLflow
            mlflow.log_params(self.config.all_params)
            mlflow.log_metric("accuracy", accuracy)
            mlflow.log_metric("precision", precision)
            mlflow.log_metric("recall", recall)

            # Save model
            if tracking_url_type_store != "file":
                mlflow.sklearn.log_model(model, "model", registered_model_name="XGBClassifier")
            else:
                mlflow.sklearn.log_model(model, "model")

In [None]:
try:
    config = ConfigurationManager()
    model_eval_config = config.get_model_evaluation_config()
    evaluator = ModelEvaluation(model_eval_config)
    evaluator.log_into_mlflow()
except Exception as e:
    raise e