In [4]:
import os
os.chdir("../")
%pwd

'c:\\Users\\rohan\\OneDrive\\Desktop\\New folder\\RetailDemandForecast'

In [5]:
import pickle
import pandas as pd
import numpy as np
from pathlib import Path
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from typing import Dict, Any
from src.retailDemand.entity.config_entity import RetailForecastingConfig
from src.retailDemand.utils.common import createDir
from src.retailDemand import logger

In [None]:
class RetailDemandForecast:
    def __init__(self,config:RetailForecastingConfig):
        self.config=config
        self.data=None
        self.model={}
        self.forecasts={}
        self.results={}
        self.logger=logger

        createDir([self.config.root_dir,self.config.model_dir,self.config.results_dir])

    def dataPrep(self)->pd.DataFrame:
        try:
            fPath=self.config.dataset_dir/"features.csv"
            tPath=self.config.dataset_dir/"train.csv"
            sPath=self.config.dataset_dir/"stores.csv"

            self.logger.info("Loading csv files for merging")
            df1=pd.read_csv(tPath)
            df2=pd.read_csv(fPath)
            df3=pd.read_csv(sPath)

            temp_data=df1.merge(df2,on=['Store','Date','IsHoliday'],how='inner')
            final_data=temp_data.merge(df3,on=['Store'],how='inner')

            final_data['Date']=pd.to_datetime(final_data['Date'])

            final_data['Year']=final_data['Date'].dt.year
            final_data['Month']=final_data['Date'].dt.month
            final_data['Week']=final_data['Date'].dt.isocalendar().week
            final_data['Day_Name']=final_data['Date'].dt.day_name
            final_data['Month_Name']=final_data['Date'].dt.month_name

            if 'Weekly_Sales' in final_data.columns and 'Weekly_Sales_x' not in final_data.columns:
                final_data['Weekly_Sales_x'] = final_data['Weekly_Sales']

            self.data = final_data
            self.logger.info(f"Final merged data shape: {final_data.shape}")
            self.logger.info(f"Columns: {list(final_data.columns)}")

            return final_data
        except Exception as e:
            self.logger.error(f"Error loading and merging data: {str(e)}")
            raise

    def storeDataPerp(self,store_id:int)->  pd.DataFrame:
        if self.data is None:
            raise ValueError("Data not loaded. Call dataPrep() first.")
        
        store_data = self.data[self.data[self.config.store_column] == store_id]

        store_data=store_data[[self.config.date_column,self.config.target_column]]
        store_data = store_data.groupby(self.config.date_column).sum()
        store_data.reset_index(inplace=True)

        store_data.columns = ['ds', 'y']

        self.logger.info(f"Prepared data for store {store_id}.")
        return store_data
    
    def baseModel(self)->Prophet:
        model=Prophet(
            daily_seasonality=self.config.daily_seasonality,
            weekly_seasonality=self.config.weekly_seasonality,
            yearly_seasonality=self.config.yearly_seasonality,
            seasonality_mode=self.config.seasonality_mode
            )
        return model

    def trainStore(self,store_id:int)-> Prophet:
        try:
            self.logger.info(f"Training for store: {store_id}")
            store_data=self.storeDataPerp(store_id=store_id)
            train=store_data.iloc[:self.config.train_split]
            model=self.baseModel()  
            model.fit(train)

            self.model[store_id]=model
            self.logger.info(f"model trained for store {store_id}")
            return model
        except Exception as e:
            self.logger.info(f"error training model for store {store_id}")
            raise

    def predictStore(self,store_id:int)-> pd.DataFrame:
        if store_id not in self.model:
            raise ValueError(f"Model for store {store_id} not found. Train the model first.")
        
        model=self.model[store_id]

        future=model.make_future_dataframe(
            periods=self.config.forecast_periods,
            freq=self.config.frequency
        )
        forecast=model.predict(future)
        self.forecasts[store_id]=forecast

        self.logger.info(f"Forecasts generated for store {store_id}")
        return forecast
    
    def modelEval(self,store_id:int)->Dict[str,float]:
        store_data=self.storeDataPerp(store_id)
        testData=store_data.iloc[self.config.train_split:]
        
        if store_id not in self.forecasts:
            self.forecasts[store_id]=self.predictStore(store_id=store_id)
        
        forecast=self.forecasts[store_id]
        y_pred=forecast['yhat'].tail(len(testData)).values
        y_true=testData['y'].values

        mape=mean_absolute_percentage_error(y_true=y_true,y_pred=y_pred)*100
        mae=mean_absolute_error(y_true,y_pred)

        metrics = {
            'MAPE': round(mape, 4),
            'MAE': round(mae, 4)
        }

        self.results[store_id] = metrics
        self.logger.info(f"Evaluation completed for store {store_id}: MAPE={mape:.2f}%")

        return metrics
    
    def trainAll(self)-> pd.DataFrame:
        if self.data is None:
            self.dataPrep()

        stores=self.data[self.config.store_column].unique()

        self.logger.info(f"Starting training {len(stores)} stores")

        for store_id in stores:
            try:
                self.trainStore(store_id)
                self.modelEval(store_id)
                self.logger.info(f"Completed {store_id}")
            except Exception as e:
                self.logger.error(f"Failed for store {store_id}")
                continue

        resDf=pd.DataFrame.from_dict(self.results,orient='index')
        resDf.reset_index(inplace=True)
        resDf.columns=['Store','MAPE','MAE']

        self.logger.info("Model Training completed for{len(stores)")
        return resDf
    
    def predSales(self,store_id:int,date:str)->Dict[str,Any]:
        if store_id not in self.models:
            available_stores = list(self.models.keys())
            raise ValueError(f"Model for store {store_id} not found. Available stores: {available_stores}")
        
        model=self.model[store_id]
        future=pd.DataFrame({'ds',[pd.to_datetime(date)]})
        forecast=model.predict(future)

        res={
            "store_id":store_id,
            "prediction_date":date,
            "predicted_sales":round(forecast['yhat'].iloc[0],2),
            'lower_bound': round(forecast['yhat_lower'].iloc[0], 2),
            'upper_bound': round(forecast['yhat_upper'].iloc[0], 2)
        }

        self.logger.info(f"prediction for store {store_id} on {date} is ${res['predicted_sales']:,.2f}")
        return res
    
    def save_model(self)-> None:
        self.logger.info("Saving models")

        for store_id,model in self.model.items():
            model_path=self.config.model_dir/f"prophet_store_{store_id}.pkl"
            with open(model_path,'wb') as f:
                pickle.dump(model,f)
        
        all_models_path=self.config.model_dir/f"all_models.pkl"
        with open(all_models_path,'wb') as f:
            pickle.dump(self.model,f)
            
        self.logger.info(f"All {len(self.models)} models saved successfully")

    def save_res(self)->None:
        res_df=pd.DataFrame.from_dict(self.results,orient='index')
        res_df.reset_index(inplace=True)
        res_df.columns = ['Store', 'MAPE', 'MAE']

        results_path = self.config.results_dir / "model_evaluation_results.csv"
        res_df.to_csv(results_path, index=False)
        
        self.logger.info(f"Results saved to {results_path}")

In [None]:
from dataclasses import dataclass


@dataclass(frozen=True)
class PrepareBaseModelConfig:
    root_dir:Path
    base_model_path:Path
    seasonality_mode: str = 'multiplicative'
    daily_seasonality: bool = False
    weekly_seasonality: bool = True
    yearly_seasonality: bool = True

In [None]:
class BaseModelCreation:
    def __init__(self,):
        pass

    def getBaseModel(self)->Prophet:
        model=Prophet(

        )