In [3]:
import os

In [4]:
%pwd

'd:\\IT-service_delivery_risk_predictor\\research'

In [5]:
os.chdir("../")

In [6]:
%pwd

'd:\\IT-service_delivery_risk_predictor'

In [7]:
os.environ["MLFLOW_TRACKING_URI"]="https://dagshub.com/Aryasree20/IT-service_delivery_risk_predictor.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"]="Aryasree20"
os.environ["MLFLOW_TRACKING_PASSWORD"]="0675968246bbebab8413556447840afd8aaa8e48"

In [8]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    plots_dir: Path
    metric_file_name: Path
    model_path: Path
    X_test_path: Path
    y_test_path: Path
    mlflow_uri: str
    all_params: dict

In [16]:
from risk_predictor.constants import *
from risk_predictor.utils.common import read_yaml,create_directories,save_json


In [17]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])


    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir, config.plots_dir])
        
        return ModelEvaluationConfig(
            root_dir=config.root_dir,
            plots_dir=config.plots_dir,
            metric_file_name=config.metric_file_name,
            model_path=config.model_path,
            X_test_path=config.X_test_path,
            y_test_path=config.y_test_path,
            mlflow_uri="https://dagshub.com/Aryasree20/IT-service_delivery_risk_predictor.mlflow",
            all_params=self.params.KnnClassifier
        )    

In [10]:
import os
import joblib
import mlflow
import mlflow.sklearn
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from urllib.parse import urlparse
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_curve,
    auc
)
from sklearn.preprocessing import label_binarize
from risk_predictor import logger


In [None]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
        self.metrics = None
        self.roc_path = None

    def eval_metrics(self, y_true, y_pred, y_pred_proba):
        """Compute classification metrics"""
        metrics = {
            "accuracy": accuracy_score(y_true, y_pred),
            "precision": precision_score(y_true, y_pred, average="weighted"),
            "recall": recall_score(y_true, y_pred, average="weighted"),
            "f1_score": f1_score(y_true, y_pred, average="weighted"),
        }
        return metrics

    def _save_roc_curve(self, name, y_true, y_pred_proba, classes):
        """Save ROC curve plot for multiclass"""
        y_true_bin = label_binarize(y_true, classes=classes)
        if y_true_bin.shape[1] == 1:
            return None
        plt.figure()
        for i in range(len(classes)):
            fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
            roc_auc = auc(fpr, tpr)
            plt.plot(fpr, tpr, label=f"Class {classes[i]} (AUC={roc_auc:.2f})")

        plt.plot([0, 1], [0, 1], "k--")
        plt.title(f"ROC Curve - {name}")
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.legend(loc="lower right")

        roc_path = Path(self.config.plots_dir) / f"{name}_roc.png"
        plt.savefig(roc_path)
        plt.close()
        logger.info(f" Saved ROC curve plot: {roc_path}")
        return roc_path

    def eval_and_save(self):
        """Run evaluation, save metrics and ROC plot"""
        # Load test data
        X_test = joblib.load(self.config.X_test_path)
        y_test = joblib.load(self.config.y_test_path)

        # Load trained model
        model = joblib.load(self.config.model_path)

        # Predictions
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)
        
        # Metrics
        self.metrics = self.eval_metrics(y_test, y_pred, y_pred_proba)
        save_json(path=Path(self.config.metric_file_name), data=self.metrics)

        # ROC curve
        self.roc_path = self._save_roc_curve("KNN_Multiclass", y_test, y_pred_proba, classes=model.classes_)

        logger.info("Metrics and ROC curve saved")

    def log_into_mlflow(self):
        """Log results into MLflow"""
        if self.metrics is None or self.roc_path is None:
            raise ValueError("Run eval_and_save() before log_into_mlflow()")

        # Load trained model
        model = joblib.load(self.config.model_path)

        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():
            # Log params
            mlflow.log_params(self.config.all_params)

            # Log metrics
            for k, v in self.metrics.items():
                mlflow.log_metric(k, v)

            # Log ROC curve plot
            mlflow.log_artifact(str(self.roc_path), artifact_path="plots")

            # Register the model in MLflow Model Registry
            if tracking_url_type_store != "file":
                mlflow.sklearn.log_model(model, "model", registered_model_name="Random_Forest_MultiClassifier")
            else:
                mlflow.sklearn.log_model(model, "model")

        
        logger.info("Model evaluation (RandomForestClassifier) completed and logged to MLflow")
        


In [13]:
try:
        config = ConfigurationManager()
        model_evaluation_config = config.get_model_evaluation_config()
        model_evaluation = ModelEvaluation(config=model_evaluation_config)
        model_evaluation.eval_and_save() 
        model_evaluation.log_into_mlflow()
except Exception as e:
        raise e


NameError: name 'ConfigurationManager' is not defined