In [1]:
import os 
%pwd

'/home/mrafi/Desktop/Books/Bootcamp/E2EMLOps/18-e2e/MLops_Ds1/research'

In [2]:
os.chdir("../")

In [3]:
%pwd

'/home/mrafi/Desktop/Books/Bootcamp/E2EMLOps/18-e2e/MLops_Ds1'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataIngestionConfig:
  root_dir: Path
  source_URL: str
  local_data_file: Path
  unzip_dir: Path

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataValidationConfig:
  root_dir: Path
  STATUS_FILE: Path
  unzip_data_dir: Path
  all_schema: dict

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataTransformationConfig:
  root_dir: Path
  data_path: Path

In [7]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class ModelTrainerConfig:
  root_dir:  Path
  train_data_path: Path
  test_data_path: Path
  model_name: str
  alpha: float
  l1_ratio: float
  target_column: str

In [8]:
from src.data_science.constants import CONFIG_FILE_PATH, SCHEMA_FILE_PATH, PARAMS_FILE_PATH
from src.data_science.utils.common import read_yaml, create_dir
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, schema_file_path= SCHEMA_FILE_PATH, params_file_path=PARAMS_FILE_PATH ):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_file_path)
        self.schema = read_yaml(schema_file_path)
        
        create_dir([self.config.artifacts_toot])
        
    def get_dataingestion_config(self)-> DataIngestionConfig:
        config = self.config.data_ingestion
        create_dir([config.root_dir])
        data_ingestion_config = DataIngestionConfig(
              root_dir= config.root_dir,
              source_URL=config.source_URL,
              local_data_file=config.local_data_file ,
              unzip_dir=config.unzip_dir
        )
        return data_ingestion_config
    def get_datavalidation_config(self)-> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.COLUMNS
        create_dir([config.root_dir])
        data_validation_config = DataValidationConfig(
              root_dir= config.root_dir,
              STATUS_FILE=config.STATUS_FILE,
              unzip_data_dir=config.unzip_data_dir ,
              all_schema=schema
        )
        return data_validation_config
    
    def get_datatransformation_config(self)-> DataTransformationConfig:
        config = self.config.data_transformation
        create_dir([config.root_dir])
        data_transformation_config = DataTransformationConfig(
            root_dir= config.root_dir,
            data_path=config.data_path,
        )
        return data_transformation_config
    
    def get_modeltrainer_config(self)-> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.ElasticNet
        schema = self.schema.TARGET_COLUMN
        create_dir([config.root_dir])
        model_trainer_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,
            alpha= params.alpha,
            l1_ratio = params.l1_ratio,
            target_column = schema.name 
        )
        return model_trainer_config
        
    

In [None]:
from src.data_science import logger
import pandas as pd
from sklearn.linear_model import ElasticNet
import joblib
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
    
    def model_training(self):
        train_data = pd.read_csv(self.config.train_data_path, sep=",")
        test_data = pd.read_csv(self.config.test_data_path, sep=",")
        train_x = train_data.drop(columns=[self.config.target_column], axis=1)
        train_y = train_data[self.config.target_column]
        test_x = test_data.drop(columns=[self.config.target_column], axis=1)
        test_y = test_data[self.config.target_column]
        logger.info(f"Training Starting")
        model = ElasticNet(alpha=self.config.alpha, l1_ratio=self.config.l1_ratio, random_state=42)
        model.fit(train_x, train_y)
        joblib.dump(model, os.path.join(self.config.root_dir, self.config.model_name))
        logger.info(f"Model saved to {os.path.join(self.config.root_dir, self.config.model_name)}")
        
        

In [21]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_modeltrainer_config()
    data_transofrmation = ModelTrainer(model_trainer_config)
    data_transofrmation.model_training()
except Exception as e:
    raise e 


2025-05-03 19:16:20,304, common.py, 19, INFO, config/config.yaml file loaded sucessfully
2025-05-03 19:16:20,307, common.py, 19, INFO, params.yaml file loaded sucessfully
2025-05-03 19:16:20,309, common.py, 19, INFO, schema.yaml file loaded sucessfully
2025-05-03 19:16:20,311, common.py, 32, INFO, Directory artifacts created
2025-05-03 19:16:20,312, common.py, 32, INFO, Directory artifacts/model_trainer created


In [None]:

data_validation_status = config.get_datavalidation_config()


[2025-05-03 08:28:09,747: INFO: common: Directory artifacts/data_validation created]


In [None]:
data_validation_status.STATUS_FILE

'artifacts/data_validation/status.txt'

In [None]:
with open(Path(data_validation_status.STATUS_FILE), "r") as f:
            content = f.read().split()
            print(content)
            if 'True' in content[-1]:
                print("found")

['Validation', 'Sucessful.', 'Validation', 'Status:True']
found
