In [2]:
import os
os.chdir('..')

In [3]:
from dataclasses import dataclass
from pathlib import Path

In [4]:
@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    target_column: str
    model_name: str
    alpha: float
    l1_ratio: float
    random_state: int
    n_estimators: int
    learning_rate: float

In [5]:
from MLOps.constants import *
from MLOps.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self, config_file_path=CONFIG_FILE_PATH, params_file_path=PARAMS_FILE_PATH, schema_file_path=SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)
        self.schema = read_yaml(schema_file_path)
        create_directories(list_of_directories=[self.config.artifacts_root])
        
    def get_model_trainer_config(self) -> DataTransformationConfig:
        model_trainer_config = self.config.model_trainer
        schema = self.schema.TARGET_COLUMN
        params = self.params.ElasticNet
        create_directories([model_trainer_config.root_dir])
        return DataTransformationConfig(
            root_dir=model_trainer_config.root_dir,
            train_data_path=model_trainer_config.train_data_path,
            test_data_path=model_trainer_config.test_data_path,
            model_name=model_trainer_config.model_name,
            alpha=params.alpha,
            l1_ratio=params.l1_ratio,
            random_state=params.random_state,
            n_estimators=params.n_estimators,
            learning_rate=params.learning_rate,
            target_column=schema.name
        )

In [6]:
import pandas as pd
import os
from MLOps import logger
from sklearn.linear_model import ElasticNet
import joblib
import xgboost as xgb

In [29]:
from prophet import Prophet
import cloudpickle 
from datetime import datetime
class Predictor:
    def __init__(self, model,dollar_rate, last_date):
        self.model = model
        self.dollar_rate = dollar_rate
        self.last_date = last_date

    def predict(self, periods,freq='W',include_history=False,start_from=None) -> pd.Series:
        date_diff = (datetime.now() - pd.to_datetime(self.last_date)).days
        future = self.model.make_future_dataframe(periods=periods+date_diff, freq=freq,include_history=include_history)  # monthly
        predictions = self.model.predict(future)
        
        if not include_history:
            predictions = predictions[predictions['ds'] > datetime.now()]
        elif include_history and start_from:
            predictions = predictions[predictions['ds'] >= pd.to_datetime(start_from)]
        predictions['yhat'] = predictions['yhat'] * self.dollar_rate  # Adjusting predictions based on dollar rate
        predictions['yhat_lower'] = predictions['yhat_lower'] * self.dollar_rate
        predictions['yhat_upper'] = predictions['yhat_upper'] * self.dollar_rate
        return predictions

class ModelTrainer:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def train(self):
        print(f"Training model with config: {self.config.train_data_path}")
        data = pd.read_csv(self.config.train_data_path)

        train_data = data.copy()
        train_data['date'] = pd.to_datetime(train_data['date'])
        train_data = train_data.rename(columns={'date': 'ds', 'today': 'y'})
        print(train_data.tail())
        model = Prophet(yearly_seasonality=True)
        model.fit(train_data)
        
        predictor = Predictor(model=model, dollar_rate=data.iloc[-1]['dollar_rate'],last_date = data.iloc[-1]['date'])
        
        model_file_path = os.path.join(self.config.root_dir, self.config.model_name)
        # joblib.dump(predictor, model_file_path)
        with open(model_file_path, 'wb') as f:
            cloudpickle.dump(predictor, f)

In [30]:
try:
    logger.info(">>>>>>>>> stage Model Trainer Stage started <<<<<<<<")
    config_manager = ConfigurationManager()
    model_trainer_config = config_manager.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.train()
    logger.info(">>>>>>>>> stage Model Trainer Stage completed <<<<<<<<")
except Exception as e: 
    logger.error(f"Error in Model Trainer Stage: {e}")
    raise e

2025-07-07 01:26:57,735 - MLOpsLogger - INFO - >>>>>>>>> stage Model Trainer Stage started <<<<<<<<
Directory already exists: artifacts
Directory already exists: artifacts\model_trainer
Training model with config: artifacts/data_transformation/full.csv
            ds  temp  precip  humidity  windspeed  cloudcover   dew  \
834 2025-06-30  28.7     0.0      74.0       25.9        70.0  22.6   
835 2025-07-01  28.2     0.0      75.7       30.6        90.5  23.0   
836 2025-07-02  28.1     0.1      71.2       32.0        81.6  22.0   
837 2025-07-03  28.5     0.1      69.4       30.2        83.7  21.8   
838 2025-07-04  28.9     0.0      66.6       29.5        64.0  21.3   

     yesterday         y year_month  dollar_rate  
834      490.0  1.467342    2025-06   299.861905  
835      440.0  1.466881    2025-07   299.956250  
836      440.0  1.250182    2025-07   299.956250  
837      375.0  1.333528    2025-07   299.956250  
838      400.0  1.316859    2025-07   299.956250  
2025-07-07 01:

01:26:58 - cmdstanpy - INFO - Chain [1] start processing


2025-07-07 01:26:58,217 - cmdstanpy - INFO - Chain [1] start processing


01:26:58 - cmdstanpy - INFO - Chain [1] done processing


2025-07-07 01:26:58,427 - cmdstanpy - INFO - Chain [1] done processing
2025-07-07 01:26:58,460 - MLOpsLogger - INFO - >>>>>>>>> stage Model Trainer Stage completed <<<<<<<<
