In [1]:
import os

In [2]:
%pwd

'c:\\Users\\layeg\\Desktop\\GitHub\\Holland_Barret\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\layeg\\Desktop\\GitHub\\Holland_Barret'

In [5]:
# setting up environment variables for MLflow tracking
os.environ["MLFLOW_TRACKING_URI"]="https://dagshub.com/layeghmorteza/Holland_Barret.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"]="layeghmorteza"
os.environ["MLFLOW_TRACKING_PASSWORD"]="47ea8e5519d9fbb79e294bf09a018544a39aa22d"


In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    """
    Configuration class for model evaluation.

    Attributes:
    root_dir (Path): The root directory where evaluation artifacts will be stored.
    test_data_path (Path): The path to the test data.
    model_path (Path): The path to the trained model.
    all_params (dict): All parameters used in the model.
    metric_file_name (Path): The file name to store evaluation metrics.
    target_column (str): The name of the target column.
    mlflow_uri (str): The URI for MLflow tracking.
    """
    root_dir: Path
    test_data_path: Path
    model_path: Path
    all_params: dict
    metric_file_name: Path
    target_column: str
    mlflow_uri: str


In [7]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories, save_json

In [8]:
class ConfigurationManager:
    """
    Class for managing configuration files and retrieving model evaluation configuration.

    Attributes:
    config_filepath (str): The file path of the main configuration file.
    params_filepath (str): The file path of the parameters file.
    schema_filepath (str): The file path of the schema file.
    """

    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH
    ):
        """
        Initialize ConfigurationManager with file paths and read configuration files.

        Args:
        config_filepath (str): The file path of the main configuration file.
        params_filepath (str): The file path of the parameters file.
        schema_filepath (str): The file path of the schema file.
        """
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        """
        Retrieve the model evaluation configuration from the main configuration.

        Returns:
        ModelEvaluationConfig: The model evaluation configuration object.
        """
        config = self.config.model_evaluation
        params = self.params.GBMClassifier
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path=config.model_path,
            all_params=params,
            metric_file_name=config.metric_file_name,
            target_column=schema.name,
            mlflow_uri="https://dagshub.com/layeghmorteza/Holland_Barret.mlflow"
        )

        return model_evaluation_config



In [9]:
import os
import pandas as pd
#from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn
import numpy as np
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


In [10]:
class ModelEvaluation:
    """
    Class for evaluating machine learning models.

    Attributes:
    config (ModelEvaluationConfig): The configuration for model evaluation.
    """

    def __init__(self, config: ModelEvaluationConfig):
        """
        Initialize ModelEvaluation with a configuration object.

        Args:
        config (ModelEvaluationConfig): The configuration for model evaluation.
        """
        self.config = config
        
    def evaluate_clf(self, true, predicted):
        '''
        This function takes in true values and predicted values
        Returns: Accuracy, F1-Score, Precision, Recall, Roc-auc Score
        '''
        acc = accuracy_score(true, predicted) # Calculate Accuracy
        f1 = f1_score(true, predicted) # Calculate F1-score
        precision = precision_score(true, predicted) # Calculate Precision
        recall = recall_score(true, predicted)  # Calculate Recall
        roc_auc = roc_auc_score(true, predicted) # Calculate Roc
        return acc, f1 , precision, recall, roc_auc

    def log_into_mlflow(self):
        """
        Log evaluation metrics into MLflow.

        """
        test_data = pd.read_csv(self.config.test_data_path)
        model = joblib.load(self.config.model_path)

        X_test = test_data.iloc[:, :-1]
        y_test = test_data.iloc[:, -1]

        mlflow.set_tracking_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():

            predicted_qualities = model.predict(X_test)

            (acc, f1, precision, recall, roc_auc) = self.evaluate_clf(y_test, predicted_qualities)
            
            # Saving metrics as local
            scores = {"acc": acc, "f1": f1, "precision": precision, "recall": recall, "roc_auc": roc_auc}
            save_json(path=Path(self.config.metric_file_name), data=scores)

            mlflow.log_params(self.config.all_params)

            mlflow.log_metric("acc", acc)
            mlflow.log_metric("f1", f1)
            mlflow.log_metric("precision", precision)
            mlflow.log_metric("recall", recall)
            mlflow.log_metric("roc_auc", roc_auc)

            # Model registry does not work with file store
            if tracking_url_type_store != "file":

                # Register the model
                mlflow.sklearn.log_model(model, "model", registered_model_name="GBMClassifier")
            else:
                mlflow.sklearn.log_model(model, "model")


In [11]:
try:
    # Initialize ConfigurationManager and retrieve model evaluation configuration
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()

    # Initialize ModelEvaluation with the retrieved configuration
    model_evaluation = ModelEvaluation(config=model_evaluation_config)

    # Log evaluation metrics into MLflow
    model_evaluation.log_into_mlflow()

except Exception as e:
    # Raise the caught exception
    raise e


[2024-02-28 11:26:53,497: INFO: common: YAML file loaded successfully from: config\config.yaml]
[2024-02-28 11:26:53,499: INFO: common: YAML file loaded successfully from: params.yaml]
[2024-02-28 11:26:53,502: INFO: common: YAML file loaded successfully from: schema.yaml]
[2024-02-28 11:26:53,503: INFO: common: Created directory at: artifacts]
[2024-02-28 11:26:53,505: INFO: common: Created directory at: artifacts/model_evaluation]
[2024-02-28 11:26:54,502: INFO: common: JSON file saved at: artifacts\model_evaluation\metrics.json]


Registered model 'GBMClassifier' already exists. Creating a new version of this model...
2024/02/28 11:27:01 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: GBMClassifier, version 21
Created version '21' of model 'GBMClassifier'.
