In [1]:
import os

In [2]:
pwd("../")

'f:\\Files\\DSML\\Condition2Cure\\notebook'

In [3]:
os.chdir("../")

In [4]:
pwd('../')

'f:\\Files\\DSML\\Condition2Cure'

In [5]:
import os
import json
import pandas as pd
import numpy as np
import joblib
import mlflow
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from dataclasses import dataclass
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, confusion_matrix
)
from sklearn.model_selection import train_test_split
from Condition2Cure.utils.helpers import *
import dagshub
from Condition2Cure import logger

In [None]:
from Condition2Cure.utils.helpers import *
from Condition2Cure.constants import *
from Condition2Cure.utils.execptions import *

In [None]:
@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    features_path: Path
    labels_path: Path
    model_path: Path
    label_encoder_path: Path
    test_size: float
    random_state: int
    metrics_path: Path
    cm_path: Path

In [None]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        split = self.params.train_test_split

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            features_path=config.features_path,
            labels_path=config.labels_path,
            model_path=config.model_path,
            label_encoder_path=config.label_encoder_path,
            test_size=split.test_size,
            random_state=split.random_state,
            metrics_path=config.metrics_path,
            cm_path=config.cm_path
        )

        return model_evaluation_config

In [None]:
class ModelEvaluator:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

        mlflow.set_tracking_uri("./mlruns")
        mlflow.set_experiment("Condition2Cure")
        logger.info("MLflow tracking setup complete.")

    def evaluation(self):
        # Load data and artifacts
        if not os.path.exists(self.config.features_path):
            raise FileNotFoundError(f"Features file not found: {self.config.features_path}")
        if not os.path.exists(self.config.model_path):
            raise FileNotFoundError(f"Model file not found: {self.config.model_path}")
        if not os.path.exists(self.config.label_encoder_path):
            raise FileNotFoundError(f"Label encoder file not found: {self.config.label_encoder_path}")

        X = np.load(self.config.features_path, allow_pickle=True).item()
        y = np.load(self.config.labels_path, allow_pickle=True).item()
        X = X['X'] 
        y = y['y']

        model = joblib.load(self.config.model_path)
        label_encoder = joblib.load(self.config.label_encoder_path)

        logger.info("Loaded features, model, and label encoder.")

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y,
            test_size=self.config.test_size,
            random_state=self.config.random_state,
            stratify=y
        )

        logger.info("Split data into train and test sets.")

        # Predictions and metrics
        preds = model.predict(X_test)

        metrics = {
            "accuracy": accuracy_score(y_test, preds),
            "precision_weighted": precision_score(y_test, preds, average="weighted"),
            "recall_weighted": recall_score(y_test, preds, average="weighted"),
            "f1_weighted": f1_score(y_test, preds, average="weighted"),
            "precision_macro": precision_score(y_test, preds, average="macro"),
            "recall_macro": recall_score(y_test, preds, average="macro"),
            "f1_macro": f1_score(y_test, preds, average="macro"),
        }

        create_directories([os.path.dirname(self.config.metrics_path)])
        save_json(path=Path(self.config.metrics_path), data=metrics)
        logger.info(f"Metrics saved to {self.config.metrics_path}")

        # MLflow logging
        with mlflow.start_run(run_name="Model Evaluation"):
            mlflow.log_metrics({k: float(v) for k, v in metrics.items()})
            mlflow.set_tag("stage", "evaluation")
            mlflow.set_tag("evaluation_date", str(pd.Timestamp.now()))
            mlflow.log_artifact(self.config.metrics_path)
            mlflow.log_artifact(self.config.model_path)
            mlflow.log_artifact(self.config.label_encoder_path)
            logger.info("Metrics and artifacts logged to MLflow.")

            cm = confusion_matrix(y_test, preds)
            plt.figure(figsize=(6, 4))
            sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
            plt.title("Confusion Matrix")
            plt.xlabel("Predicted")
            plt.ylabel("Actual")
            plt.tight_layout()
            
            cm_save_path = os.path.join(os.path.dirname(self.config.cm_path), "cm.png")
            create_directories([os.path.dirname(cm_save_path)])
            plt.savefig(cm_save_path)
            plt.close()
            mlflow.log_artifact(cm_save_path)
            logger.info(f"Confusion matrix saved to {cm_save_path}")

        logger.info("Model evaluation complete. Metrics and plots logged.")
        return metrics

In [None]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluator(config=model_evaluation_config)
    model_evaluation.evaluation()
        
except FileNotFoundError as e:
    raise CustomException(str(e), sys)

[2025-06-20 20:19:46,878: INFO: helpers: yaml file: config\config.yaml loaded successfully]
[2025-06-20 20:19:46,884: INFO: helpers: yaml file: config\schema.yaml loaded successfully]
[2025-06-20 20:19:46,884: INFO: helpers: yaml file: config\params.yaml loaded successfully]
[2025-06-20 20:19:46,900: INFO: helpers: created directory at: artifacts]
[2025-06-20 20:19:46,900: INFO: helpers: created directory at: artifacts/model_evaluation]


[2025-06-20 20:19:47,717: INFO: helpers: Repository initialized!]
[2025-06-20 20:19:49,850: INFO: 1549089019: MLflow tracking setup complete.]
[2025-06-20 20:19:52,473: INFO: 1549089019: Loaded features, model, and label encoder.]
[2025-06-20 20:19:52,934: INFO: 1549089019: Split data into train and test sets.]
[2025-06-20 20:19:53,667: INFO: helpers: created directory at: artifacts/model_evaluation]
[2025-06-20 20:19:53,686: INFO: helpers: json file saved at: artifacts\model_evaluation\metrics.json]
[2025-06-20 20:19:53,689: INFO: 1549089019: Metrics saved to artifacts/model_evaluation/metrics.json]
[2025-06-20 20:19:54,633: INFO: 1549089019: Metrics and artifacts logged to MLflow.]
[2025-06-20 20:19:55,888: INFO: helpers: created directory at: artifacts/model_evaluation]
[2025-06-20 20:19:56,499: INFO: 1549089019: Confusion matrix saved to artifacts/model_evaluation\cm.png]
🏃 View run Model Evaluation at: http://localhost:5000/#/experiments/374096542705668379/runs/7e850456435543f99f3