In [69]:
import os

In [70]:
os.environ["MLFLOW_TRACKING_URI"] = "https://dagshub.com/AbhijeethKollarapu/datascience_e2e_project1.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"] = "AbhijeethKollarapu"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "6b5ffdc1153cff3a2c47770bb59dbd134785df4b"

In [71]:
%pwd


'd:\\mlflow\\datascience-e2e-project1'

In [None]:
# os.chdir("../")
# %pwd

'd:\\mlflow\\datascience-e2e-project1'

In [84]:
from dataclasses import dataclass
from pathlib import Path

In [85]:
@dataclass
class ModelEvaluationConfig:
  root_dir: Path
  model_path: Path
  test_data_path: Path
  metrics_file_name: Path
  mlflow_tracking_uri: str
  target_col: str
  model_params: dict


In [None]:
from src.ds_e2e_project1.constants import *
from src.ds_e2e_project1.utils.common import read_yaml, create_directories, save_json_data

In [101]:
class ConfigurationManager:
  def __init__(
      self,
      config_filepath = CONFIG_FILE_PATH,
      params_filepath = PARAMS_FILE_PATH,
      schema_filepath = SCHEMA_FILE_PATH
      ):
    self.config = read_yaml(config_filepath)
    self.params = read_yaml(params_filepath)
    self.schema = read_yaml(schema_filepath)

    create_directories([self.config.artifacts_root])

  def get_model_eval_config(self) -> ModelEvaluationConfig:
    config = self.config.model_evaluation
    create_directories([config.root_dir])
    model_eval_config = ModelEvaluationConfig(
      metrics_file_name=config.metrics_file_name,
      mlflow_tracking_uri=os.environ["MLFLOW_TRACKING_URI"],
      model_path=config.model_path,
      root_dir=config.root_dir,
      target_col=self.schema.TARGET_COLUMN.name,
      test_data_path=config.test_data_path,
      model_params=self.params.ElasticNet
    )
    return model_eval_config

In [102]:
import joblib
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.metrics import accuracy_score, r2_score, confusion_matrix, classification_report, mean_squared_error, mean_absolute_error
import mlflow
from mlflow.models import infer_signature 
import mlflow.sklearn
import os
from urllib.parse import urlparse
import numpy as np

In [103]:
class ModelEvaluation:
  def __init__(self, model_eval_config: ModelEvaluationConfig):
    self.config = model_eval_config

  def get_model(self):
    model = joblib.load(self.config.model_path)
    return model
  
  def get_test_data(self):
    test_data = pd.read_csv(self.config.test_data_path)
    X_test = test_data.drop(columns=[self.config.target_col])
    y_test = test_data[self.config.target_col]
    return X_test, y_test
  
  def get_evaluation_results(self):
    X_test, y_test = self.get_test_data()
    model = self.get_model()

    y_pred = model.predict(X_test)

    # r2 = r2_score(y_test, y_pred)
    # mae = mean_absolute_error(y_test, y_pred)
    # rmse = (mean_squared_error(y_test, y_pred))**(1/2)
    # model_scores = {"r2":r2, "mae":mae, "rmse":rmse}
    model_scores = {}
    # model_scores["accuracy"] = accuracy_score(y_test, y_pred)
    model_scores["r2"] = r2_score(y_test, y_pred)
    model_scores["mae"] = mean_absolute_error(y_test, y_pred)
    model_scores["rmse"] = (mean_squared_error(y_test, y_pred))**(1/2)
    print(model_scores)

    save_json_data(dest_json_file_path =  Path(self.config.metrics_file_name), json_data=model_scores)

    # cm = confusion_matrix(y_test, y_pred)
    # report = classification_report(y_test, y_pred)

    # self.log_to_mlflow(model_scores, cm, report, model)

    return model_scores

  def log_to_mlflow(self):
    model = model = self.get_model()
    model_scores = self.get_evaluation_results()

    X_test, y_test = self.get_test_data()

    mlflow.set_tracking_uri(self.config.mlflow_tracking_uri)
    mlflow.set_experiment("Wine_quality_estimator_ds1")

    with mlflow.start_run():
      infer_signature(X_test, y_test)
      
      # mlflow.log_text(str(cm))
      # mlflow.log_text(str(report))

      mlflow.log_params(self.config.model_params)
      mlflow.log_metrics(model_scores)

      tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

      if tracking_url_type_store != "file":
        mlflow.sklearn.log_model(model, "model", registered_model_name="Wine_quality_predictor_elasticnet")
      else:
        mlflow.sklearn.log_model(model, "model")


In [104]:
try:
  config_manager = ConfigurationManager()
  model_eval_config = config_manager.get_model_eval_config()
  model_eval = ModelEvaluation(model_eval_config)
  model_eval.log_to_mlflow()
except Exception as e:
  raise e

[2025-04-03 01:47:40,014 : INFO : common : YAML file config\config.yaml read successfully]
[2025-04-03 01:47:40,017 : INFO : common : YAML file params.yaml read successfully]
[2025-04-03 01:47:40,021 : INFO : common : YAML file schema.yaml read successfully]
[2025-04-03 01:47:40,022 : INFO : common : Directory artifacts created]
[2025-04-03 01:47:40,023 : INFO : common : Directory artifacts/model_evaluation created]
{'r2': 0.26038065904013563, 'mae': 0.5394267815902264, 'rmse': 0.6936792109617563}
[2025-04-03 01:47:40,134 : INFO : common : Given json data saved to file artifacts\model_evaluation\metrics.json]


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Registered model 'Wine_quality_predictor_elasticnet' already exists. Creating a new version of this model...
2025/04/03 01:47:48 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Wine_quality_predictor_elasticnet, version 11


🏃 View run powerful-kit-590 at: https://dagshub.com/AbhijeethKollarapu/datascience_e2e_project1.mlflow/#/experiments/1/runs/767e1d2942124289b50a97b51e45e0a3
🧪 View experiment at: https://dagshub.com/AbhijeethKollarapu/datascience_e2e_project1.mlflow/#/experiments/1


Created version '11' of model 'Wine_quality_predictor_elasticnet'.
