In [5]:
pwd

'c:\\Users\\Arpit Kadam\\Desktop\\Insurance-Fraud-Detection\\research'

In [6]:
os.chdir('c:/Users/Arpit Kadam/Desktop/Insurance-Fraud-Detection')

In [15]:
pwd

'c:\\Users\\Arpit Kadam\\Desktop\\Insurance-Fraud-Detection'

In [1]:
import os

In [2]:
os.environ['MLFLOW_TRACKING_URI'] = 'https://dagshub.com/ArpitKadam/Insurance-Fraud-Detection.mlflow'
os.environ['MLFLOW_TRACKING_TOKEN'] = '5989d6b56c4eec6ea090d927851d1fb5297a42a8'
os.environ['MLFLOW_TRACKING_USERNAME'] = 'ArpitKadam'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    all_params: dict
    metric_file_name: Path
    target_column: str
    mlflow_uri: str

In [4]:
from src.Insurance_Fraud.constants import *
from src.Insurance_Fraud.utils.common import read_yaml, create_directories

In [22]:
class ConfigurationManager:
    def __init__(
            self, 
            config_filepath=CONFIG_FILE_PATH,
            params_filepath=PARAMS_FILE_PATH,
            schema_filepath=SCHEMA_FILE_PATH
        ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        artifacts_root = self.config["artifacts_root"]  # Adjust to dictionary-style access
        create_directories([artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config["model_evaluation"]
        params = self.params["GradientBoostingClassifier"]
        schema = self.schema["TARGET_COLUMN"]

        create_directories([config["root_dir"]])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config["root_dir"],
            test_data_path=config["test_data_path"],
            model_path=config["model_path"],
            all_params=params,
            metric_file_name=config["metric_file_name"],
            target_column=schema["name"],
            mlflow_uri="https://dagshub.com/ArpitKadam/Insurance-Fraud-Detection.mlflow"
        )
        return model_evaluation_config  


In [24]:
import pandas as pd
import joblib
import mlflow
from mlflow.models.signature import infer_signature
from sklearn.metrics import (
    f1_score,
    roc_auc_score,
    accuracy_score,
    precision_score,
    recall_score,
    confusion_matrix,
    classification_report,
)
from sklearn.preprocessing import StandardScaler
from src.Insurance_Fraud.logger.logger import logger


class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    @staticmethod
    def eval_metrics(actual, pred):
        """
        Evaluate key metrics for model evaluation.
        Returns a dictionary with all metrics and reports.
        """
        metrics = {
            "f1_score": f1_score(actual, pred),
            "roc_auc_score": roc_auc_score(actual, pred),
            "accuracy_score": accuracy_score(actual, pred),
            "precision_score": precision_score(actual, pred),
            "recall_score": recall_score(actual, pred),
            "confusion_matrix": confusion_matrix(actual, pred).tolist(),  # Convert to list for JSON serialization
            "classification_report": classification_report(actual, pred, output_dict=True),  # Dict for better logging
        }
        return metrics

    def log_into_mlflow(self):
        """
        Log metrics and the trained model into MLflow.
        """
        try:
            # Load the test data and trained model
            logger.info("Loading test data and trained model.")
            test_data = pd.read_csv(self.config.test_data_path)
            model = joblib.load(self.config.model_path)

            # Preprocess test data
            logger.info("Preparing test data for evaluation.")
            test_x = test_data.drop([self.config.target_column], axis=1)
            test_y = test_data[self.config.target_column]
            scaler = StandardScaler()
            test_x_scaled = scaler.fit_transform(test_x)  # Use fit_transform to ensure compatibility

            # Set up MLflow tracking
            mlflow.set_tracking_uri(self.config.mlflow_uri)
            mlflow.set_experiment(self.config.root_dir)

            logger.info("Starting MLflow run.")
            with mlflow.start_run():
                # Predict and evaluate metrics
                logger.info("Making predictions and evaluating metrics.")
                predicted_qualities = model.predict(test_x_scaled)
                metrics = self.eval_metrics(test_y, predicted_qualities)

                # Log metrics to MLflow
                logger.info("Logging metrics to MLflow.")
                for metric_name, metric_value in metrics.items():
                    if isinstance(metric_value, list) or isinstance(metric_value, dict):
                        # Skip complex objects for metrics
                        continue
                    mlflow.log_metric(metric_name, metric_value)

                # Log all model parameters
                logger.info("Logging parameters to MLflow.")
                mlflow.log_params(self.config.all_params)

                # Log model with signature
                signature = infer_signature(test_x_scaled, predicted_qualities)
                mlflow.sklearn.log_model(
                    sk_model=model,
                    artifact_path="model",
                    registered_model_name="GradientBoostingClassifier",
                    signature=signature,
                )
                logger.info("Model and metrics logged successfully.")

        except Exception as e:
            logger.info(f"Error during model evaluation: {e}")
            raise e


In [None]:
import traceback
try:
    config = ConfigurationManager()
    print("Configuration loaded successfully.")
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.log_into_mlflow()
except Exception as e:
    logger.info(f"Error during model evaluation: {e}")
    traceback.print_exc()  # Add this line to print the full stack trace
    raise e

In [20]:
import mlflow

mlflow.set_tracking_uri('https://dagshub.com/ArpitKadam/Insurance-Fraud-Detection.mlflow')
print("MLflow tracking URI set successfully.")


MLflow tracking URI set successfully.
