In [1]:
%pwd

'c:\\Users\\44787\\Desktop\\projects\\end-to-end-SMS-Spam-classifier\\research'

In [2]:
import os

os.chdir("../")
%pwd

'c:\\Users\\44787\\Desktop\\projects\\end-to-end-SMS-Spam-classifier'

In [3]:
os.environ["MLFLOW_TRACKING_URI"]="https://dagshub.com/fraidoon_omarzai/end-to-end-SMS-Spam-classifier.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"]="fraidoon_omarzai"
os.environ["MLFLOW_TRACKING_PASSWORD"]="bc25b16bd5206328d8899cf34377f26ad71d1420"

In [4]:
from pathlib import Path
from dataclasses import dataclass

@dataclass
class ModelEvaluationConfig:
    root_dir: Path
    test_path: Path
    model_path: Path
    experiment_name: str
    mlflow_uri: str
    run_name: str
    metric_file_name: str

In [5]:
from src.SMSClassifier.constants import CONFIG_PATH
from src.SMSClassifier.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(self, config=CONFIG_PATH):
        self.config = read_yaml(config)
        
    def get_model_evaluation_config(self)-> ModelEvaluationConfig:
        config = self.config.model_evaluation
        create_directories([config.root_dir])
        
        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_path=config.test_path,
            model_path=config.model_path,
            experiment_name=config.experiment_name,
            mlflow_uri=config.mlflow_uri,
            run_name=config.run_name,
            metric_file_name=config.metric_file_name,
        )
        
        return model_evaluation_config

In [7]:
from src.SMSClassifier.logging import logger
import pandas as pd
import joblib

from sklearn.metrics import f1_score, precision_score, recall_score

from src.SMSClassifier.utils.common import save_json

import mlflow
from urllib.parse import urlparse


In [8]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig) -> None:
        self.config = config

    def eval_metrics(self, y_true, y_pred):
        f1 = f1_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred)

        return f1, precision, recall

    def model_evaluation(self):
        test_df = pd.read_csv(self.config.test_path)
        model = joblib.load(self.config.model_path)

        X_test, y_test = test_df.drop('target', axis=1), test_df['target']

        mlflow.set_experiment(self.config.experiment_name)
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        with mlflow.start_run(run_name=self.config.run_name):

            y_pred = model.predict(X_test)

            (f1, precision, recall) = self.eval_metrics(y_test, y_pred)
            
             # Saving metrics as local
            scores = {"f1": f1, "precision": precision, "recall": recall}
            save_json(path=Path(self.config.metric_file_name), data=scores)

            mlflow.autolog()

            mlflow.log_metric("f1", f1)
            mlflow.log_metric("precision", precision)
            mlflow.log_metric("recall", recall)

            # Model registry does not work with file store
            if tracking_url_type_store != "file":

                # Register the model
                # There are other ways to use the Model Registry, which depends on the use case,
                # https://mlflow.org/docs/latest/model-registry.html#api-workflow
                mlflow.sklearn.log_model(
                    model, "model", registered_model_name="MultinomialNB")
            else:
                mlflow.sklearn.log_model(model, "model")

In [10]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    evaluation = ModelEvaluation(model_evaluation_config)
    evaluation.model_evaluation()
except Exception as e:
    raise e

[2024-02-08 15:19:41,727: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-08 15:19:41,727: INFO: common: created directory at: artifacts/model_evaluation]


2024/02/08 15:19:42 INFO mlflow.tracking.fluent: Experiment with name 'SMS-Classifier-exp' does not exist. Creating a new experiment.


[2024-02-08 15:19:44,303: INFO: common: json file saved at: artifacts\model_evaluation\metrics.json]


2024/02/08 15:19:45 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
Registered model 'MultinomialNB' already exists. Creating a new version of this model...
2024/02/08 15:19:56 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: MultinomialNB, version 6
Created version '6' of model 'MultinomialNB'.
