In [1]:
import os
os.chdir('../')
%pwd

'd:\\Programming\\DataScienceProjects\\mlflow-project'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    training_data_file: Path
    testing_data_file: Path
    model: str
    alpha: float
    l1_ratio: float
    target_column: str

In [3]:
from mlflowProject.constants import *
from mlflowProject.utils.common import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            param_filepath =  PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(param_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params
        schema = self.schema

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir = config.root_dir,
            training_data_file = config.training_data_file,
            testing_data_file = config.testing_data_file,
            model = config.model,
            alpha = params.ElasticNet.alpha,
            l1_ratio = params.ElasticNet.l1_ratio,
            target_column = schema.TARGET.name
        )

        return model_trainer_config

In [5]:
import pandas as pd
from mlflowProject import logger
from sklearn.linear_model import ElasticNet
import joblib

In [8]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig) -> None:
        self.config = config

    def train(self):
        logger.info("Training model...")
        train = pd.read_csv(self.config.training_data_file)
        test = pd.read_csv(self.config.testing_data_file)
        X_train = train.drop([self.config.target_column], axis=1)
        X_test = test.drop([self.config.target_column], axis=1)
        y_train = train[self.config.target_column]
        y_test = test[self.config.target_column]

        lr = ElasticNet(
            alpha=self.config.alpha,
            l1_ratio=self.config.l1_ratio,
            random_state=42
        )
        lr.fit(X_train, y_train)

        joblib.dump(lr, self.config.model)

        logger.info(f"Model trained and saved")

In [9]:
try:
    config = ConfigurationManager()
    model_config = config.get_model_config()
    model_trainer = ModelTrainer(model_config)
    model_trainer.train()
except Exception as e:
    logger.error(e)
    raise e

[2024-04-24 02:31:38,015: INFO: common] File loaded: config\config.yaml
[2024-04-24 02:31:38,018: INFO: common] File loaded: params.yaml
[2024-04-24 02:31:38,023: INFO: common] File loaded: schema.yaml
[2024-04-24 02:31:38,030: INFO: common] Directory already exists: artifacts
[2024-04-24 02:31:38,031: INFO: common] Directory already exists: artifacts/model_trainer
[2024-04-24 02:31:38,031: INFO: 3315276761] Training model...
[2024-04-24 02:31:38,046: INFO: 3315276761] Model trained and saved
