In [95]:
# import sys
import os
# sys.path.append(os.path.abspath(".."))
os.chdir("/Users/harshdhiman/Documents/Demand Forecasting/demand-forecast")


In [None]:
import demand_forecast_engine #package
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from demand_forecast_engine.preprocessing.dataset import DataSetLoader
from demand_forecast_engine.feature_engineering.ts_feats import StatisticalFeats
from prophet import Prophet

In [97]:
file_path='data/sales_data.csv'

In [98]:
data=DataSetLoader(file_path)
df=data.read_data()
df=data.cleandata(df)
df=df.sort_values(by='Date',ascending=True)

In [99]:
df.columns

Index(['Date', 'Store ID', 'Product ID', 'Category', 'Region',
       'Inventory Level', 'Units Sold', 'Units Ordered', 'Price', 'Discount',
       'Weather Condition', 'Promotion', 'Competitor Pricing', 'Seasonality',
       'Epidemic', 'Demand'],
      dtype='object')

In [100]:
feat=StatisticalFeats(df,covariates=["Price","Demand","Competitor Pricing"],time_col="Date",
                      group_col=['Region','Category','Store ID','Product ID'])

new_df=feat.rolling_window_feats(window_len=7)

In [101]:
new_df=feat.create_lag_features(lag_list=[7,14])

In [102]:
train_cut_off_date='2023-11-30'
train_df=new_df[new_df["Date"]<train_cut_off_date]
test_df=new_df[new_df["Date"]>train_cut_off_date]

In [103]:
forecast_config={"Category":"Electronics",
                 "Region":"North",
                 "Product ID":"P0001",
                 "Store ID":"S001"}

In [104]:
def create_forecast_data(train_df:pd.DataFrame,test_df:pd.DataFrame,forecast_config:dict):
    train_df=train_df.copy()
    test_df=test_df.copy()

    train_df=train_df[ (train_df["Category"]==forecast_config['Category']) &
                      (train_df["Region"]==forecast_config['Region']) &
                      (train_df["Product ID"]==forecast_config['Product ID']) &
                      (train_df["Store ID"]==forecast_config['Store ID']) ][["Date","Demand"]]
    

    test_df=test_df[ (test_df["Category"]==forecast_config['Category']) &
                      (test_df["Region"]==forecast_config['Region']) &
                      (test_df["Product ID"]==forecast_config['Product ID']) &
                      (test_df["Store ID"]==forecast_config['Store ID']) ][["Date","Demand"]]
    return train_df,test_df                   


In [105]:
#prophet- univariate
def univariate_prophet_train_and_forecast(df,var,forecast_horizon):
    forecast_horizon=int(forecast_horizon)+1
    df=df.copy()
    df=df[["Date",var]]
    df=df.rename(columns=({"Date":"ds",var:"y"}))
    model = Prophet(yearly_seasonality=True,weekly_seasonality=True,daily_seasonality=False)
    print(f'Initiated Univariate Prophet for training...')
    print(f'Starting model training')
    model.fit(df)
    print(f'Model trained')
    #create future dataframe
    future = model.make_future_dataframe(periods=forecast_horizon)
    forecast = model.predict(future)
    print(f'Forecast completed')
    forecast_df=forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]]
    return forecast_df


In [106]:
train_data,test_data=create_forecast_data(train_df,test_df,forecast_config)
forecast_df=univariate_prophet_train_and_forecast(train_data,"Demand",forecast_horizon=test_data.shape[0])

00:13:46 - cmdstanpy - INFO - Chain [1] start processing
00:13:46 - cmdstanpy - INFO - Chain [1] done processing


Initiated Univariate Prophet for training...
Starting model training
Model trained
Forecast completed


In [107]:
test_data

Unnamed: 0,Date,Demand
69900,2023-12-01,109.0
70000,2023-12-02,160.0
70100,2023-12-03,119.0
70200,2023-12-04,73.0
70300,2023-12-05,24.0
...,...,...
75500,2024-01-26,72.0
75600,2024-01-27,99.0
75700,2024-01-28,139.0
75800,2024-01-29,112.0


In [108]:
forecast_df=forecast_df[forecast_df["ds"]>train_cut_off_date]
forecast_df=forecast_df.rename(columns=({"ds":"Date"}))
comparison_df=pd.concat([test_data,forecast_df],axis=1)
comparison_df

Unnamed: 0,Date,Demand,Date.1,yhat,yhat_lower,yhat_upper
69900,2023-12-01,109.0,NaT,,,
70000,2023-12-02,160.0,NaT,,,
70100,2023-12-03,119.0,NaT,,,
70200,2023-12-04,73.0,NaT,,,
70300,2023-12-05,24.0,NaT,,,
...,...,...,...,...,...,...
755,,,2024-01-26,90.056241,48.883426,133.381830
756,,,2024-01-27,92.707643,49.601026,136.528977
757,,,2024-01-28,95.341315,52.505770,136.179483
758,,,2024-01-29,87.772879,47.943640,133.201822


In [113]:
import numpy as np

def mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)

    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

mape(test_data["Demand"].values,forecast_df["yhat"].values)

np.float64(59.208028345986854)