In [1]:
import os

In [2]:
%pwd

'f:\\Data Science Projects\\WineQualityCheck\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'f:\\Data Science Projects\\WineQualityCheck'

In [5]:
from dataclasses import dataclass
from pathlib import Path

In [6]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    alpha: float
    l1_ratio: float
    target_column: str

In [7]:
from MLProject.constants import *
from MLProject.utils.common import read_yaml,create_directories

In [8]:
class ConfigurationManager:
    def __init__(self,
            config= read_yaml(CONFIGFILE_PATH),
            schema = read_yaml(SCHEMA_FILE_PATH),
            params = read_yaml(PARAMS_FILE_PATH)):
        self.config = config
        self.schema = schema
        self.params = params
        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:

        create_directories([self.config.model_trainer.root_dir])

        model_tranier_config = self.config.model_trainer
        params_config = self.params.ElasticNet
        schema_config = self.schema
        return ModelTrainerConfig(
            root_dir=model_tranier_config.root_dir,
            train_data_path=model_tranier_config.train_data_path,
            test_data_path=model_tranier_config.test_data_path,
            model_name=model_tranier_config.model_name,
            alpha=params_config.alpha,
            l1_ratio=params_config.l1_ratio,
            target_column=schema_config.TARGET_COLUMN
        )

[2025-01-26 18:54:38,582 : INFO : common : reading the config file from config\config.yaml]
[2025-01-26 18:54:38,584 : INFO : common : reading the config file from schema.yaml]
[2025-01-26 18:54:38,584 : INFO : common : reading the config file from params.yaml]


In [9]:
import pandas as pd
import os
from MLProject import logger
from sklearn.linear_model import ElasticNet
import joblib

In [18]:
class ModelTrainer:
    def __init__(self, config:ModelTrainerConfig):
        self.config = config

    def train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)

        train_x = train_data.drop([self.config.target_column.name], axis=1)
        test_x = test_data.drop([self.config.target_column.name], axis=1)
        train_y = train_data[[self.config.target_column.name]]
        test_y = test_data[[self.config.target_column.name]]

        lr = ElasticNet(alpha=self.config.alpha, l1_ratio=self.config.l1_ratio, random_state=42)

        lr.fit(train_x,train_y)

        joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))

In [21]:
try:
    config  = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.train()
except Exception as e:
    logger.error(e)
    raise e


[2025-01-26 19:12:59,221 : INFO : common : created directory at artifacts]
[2025-01-26 19:12:59,257 : ERROR : 1993616256 : Input X contains NaN.
ElasticNet does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values]


ValueError: Input X contains NaN.
ElasticNet does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values