In [1]:
import os

In [2]:
%pwd

'f:\\Files\\DS&ML\\Wine-Quality-Prediction\\research'

In [3]:
os.chdir('../')
%pwd

'f:\\Files\\DS&ML\\Wine-Quality-Prediction'

In [4]:
from mlproject.constants import *

[2025-01-30 16:56:08,288 : INFO : __init__ : Logger has been set up successfully!]


In [5]:
from dataclasses import dataclass
from pathlib import Path
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import joblib
import json
from mlproject.constants import *
from mlproject.utils.common import read_yaml, create_directories, save_json
from mlproject import logger

In [6]:
@dataclass
class ModelEvaluationConfig:
    root_dir:Path
    model_path:Path
    test_data_path:Path
    all_params:dict
    metric_file_path:Path
    preprocessor_path: Path
    target_column:str

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config["artifacts_root"]])


    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        params = self.params.RandomForestClassifier
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path=config.model_path,
            all_params=params,
            metric_file_path=config.metric_file_path,
            preprocessor_path=config.preprocessor_path,
            target_column=schema.name,
        )
        return model_evaluation_config

In [8]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    def evaluate(self):
        # Validate file paths
        if not os.path.exists(self.config.test_data_path):
            raise FileNotFoundError(f"Test data file not found at {self.config.test_data_path}")
        if not os.path.exists(self.config.preprocessor_path):
            raise FileNotFoundError(f"Preprocessor file not found at {self.config.preprocessor_path}")
        if not os.path.exists(self.config.model_path):
            raise FileNotFoundError(f"Model file not found at {self.config.model_path}")

        # Load preprocessor and model
        logger.info("Loading preprocessor and model...")
        preprocessor = joblib.load(self.config.preprocessor_path)
        model = joblib.load(self.config.model_path)

        # Load test data
        logger.info(f"Loading test data from {self.config.test_data_path}...")
        test_data = pd.read_csv(self.config.test_data_path)

        # Extract target column
        if self.config.target_column not in test_data.columns:
            raise KeyError(f"Target column '{self.config.target_column}' not found in test data")

        test_y = test_data[self.config.target_column]
        test_x = test_data.drop(columns=[self.config.target_column])

        logger.info(f"Test data shape: X={test_x.shape}, y={test_y.shape}")

        # Preprocess test features
        logger.info("Preprocessing test features...")
        test_x_transformed = preprocessor.transform(test_x)

        # Make predictions
        logger.info("Making predictions on the test data...")
        predictions = model.predict(test_x_transformed)

        # Evaluate the model
        logger.info("Evaluating model performance...")
        accuracy = accuracy_score(test_y, predictions)
        precision = precision_score(test_y, predictions, average="weighted")
        recall = recall_score(test_y, predictions, average="weighted")
        f1 = f1_score(test_y, predictions, average="weighted")

        logger.info(f"Model Evaluation Metrics:\naccuracy: {accuracy}\nprecision: {precision}\nrecall: {recall}\nf1: {f1}")

        # Save the evaluation metrics
        metrics_path = os.path.join(self.config.root_dir, "metrics.json")
        metrics = {
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1": f1
        }
        with open(metrics_path, "w") as f:
            json.dump(metrics, f, indent=4)
        logger.info(f"Evaluation metrics saved at {metrics_path}")

        return metrics

In [9]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    metrics = model_evaluation.evaluate()

except FileNotFoundError as e:
    logger.error(f"File not found: {e}")
except KeyError as e:
    logger.error(f"Missing key in configuration or data: {e}")
except Exception as e:
    logger.error(f"Unexpected error: {e}")

[2025-01-30 16:56:10,065 : INFO : common : yaml file: config\config.yaml loaded successfully]
[2025-01-30 16:56:10,069 : INFO : common : yaml file: params.yaml loaded successfully]
[2025-01-30 16:56:10,073 : INFO : common : yaml file: schema.yaml loaded successfully]
[2025-01-30 16:56:10,075 : INFO : common : created directory at: artifacts]
[2025-01-30 16:56:10,076 : INFO : common : created directory at: artifacts/model_evaluation]
[2025-01-30 16:56:10,079 : INFO : 1798623205 : Loading preprocessor and model...]
[2025-01-30 16:56:10,389 : INFO : 1798623205 : Loading test data from artifacts/data_transformation/test.csv...]
[2025-01-30 16:56:10,406 : INFO : 1798623205 : Test data shape: X=(400, 11), y=(400,)]
[2025-01-30 16:56:10,407 : INFO : 1798623205 : Preprocessing test features...]
[2025-01-30 16:56:10,412 : INFO : 1798623205 : Making predictions on the test data...]
[2025-01-30 16:56:10,434 : INFO : 1798623205 : Evaluating model performance...]
[2025-01-30 16:56:10,447 : INFO : 1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
