In [None]:
# import sys
# sys.path.append(os.path.abspath(".."))
import os
os.chdir("/Users/harshdhiman/Documents/Demand Forecasting/demand-forecast")

In [None]:
import demand_forecast_engine #package
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from demand_forecast_engine.preprocessing.dataset import DataSetLoader,CreateTabularData
from demand_forecast_engine.feature_engineering.ts_feats import StatisticalFeats
from prophet import Prophet
from demand_forecast_engine.models.tree_models import XGBoostModel
from demand_forecast_engine.evaluation.metrics import ForecastingMetrics
from demand_forecast_engine.models.ElasticNet import ElasticNetModel

In [None]:
file_path='data/sales_data.csv'
config_path='src/demand_forecast_engine/config/base.yaml'

In [None]:
data=DataSetLoader(file_path,config_path)
df=data.read_data()
df=data.cleandata(df)
df=df.sort_values(by='Date',ascending=True)

In [None]:
df.columns

In [None]:
feat=StatisticalFeats(df,covariates=["Price","Demand","Competitor Pricing"],time_col="Date",
                      group_col=['Region','Category','Store ID','Product ID'])

new_df=feat.rolling_window_feats(window_len=7)

In [None]:
new_df=feat.create_lag_features(lag_list=[7,14])

In [None]:
# train_cut_off_date='2023-11-30'
# train_df=new_df[new_df["Date"]<train_cut_off_date]
# test_df=new_df[new_df["Date"]>train_cut_off_date]

In [None]:
forecast_config={"Category":"Electronics",
                 "Region":"North",
                 "Product ID":"P0001",
                 "Store ID":"S001"}

In [None]:
# def create_forecast_data(train_df:pd.DataFrame,test_df:pd.DataFrame,forecast_config:dict):
#     train_df=train_df.copy()
#     test_df=test_df.copy()

#     train_df=train_df[ (train_df["Category"]==forecast_config['Category']) &
#                       (train_df["Region"]==forecast_config['Region']) &
#                       (train_df["Product ID"]==forecast_config['Product ID']) &
#                       (train_df["Store ID"]==forecast_config['Store ID']) ][["Date","Demand"]]
    

#     test_df=test_df[ (test_df["Category"]==forecast_config['Category']) &
#                       (test_df["Region"]==forecast_config['Region']) &
#                       (test_df["Product ID"]==forecast_config['Product ID']) &
#                       (test_df["Store ID"]==forecast_config['Store ID']) ][["Date","Demand"]]
#     return train_df,test_df                   


In [None]:
#prophet- univariate
def univariate_prophet_train_and_forecast(df,var,forecast_horizon):
    forecast_horizon=int(forecast_horizon)+1
    df=df.copy()
    df=df[["Date",var]]
    df=df.rename(columns=({"Date":"ds",var:"y"}))
    model = Prophet(yearly_seasonality=True,weekly_seasonality=True,daily_seasonality=False)
    print(f'Initiated Univariate Prophet for training...')
    print(f'Starting model training')
    model.fit(df)
    print(f'Model trained')
    #create future dataframe
    future = model.make_future_dataframe(periods=forecast_horizon)
    forecast = model.predict(future)
    print(f'Forecast completed')
    forecast_df=forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]]
    return forecast_df


In [None]:
# train_data,test_data=create_forecast_data(train_df,test_df,forecast_config)
# forecast_df=univariate_prophet_train_and_forecast(train_data,"Demand",forecast_horizon=test_data.shape[0])

In [None]:
# forecast_df=forecast_df[forecast_df["ds"]>train_cut_off_date]
# forecast_df=forecast_df.rename(columns=({"ds":"Date"}))
# comparison_df=pd.concat([test_data,forecast_df],axis=1)
# comparison_df

Tree models

In [None]:
#Create data for XBGBoost
tabular_data=CreateTabularData(new_df,model_type="XGBoost",file_path=file_path,config_path=config_path)
Xtrain,ytrain,Xtest,ytest=tabular_data.train_test_data(new_df)

In [None]:
Xtrain

In [None]:
xgb=XGBoostModel(l1_weight=0.001,l2_weight=0.005,objective='reg:squarederror')
xgb_model=xgb.train_xgb(Xtrain,ytrain)
forecast_vals_xgb=xgb.forecast_func(Xtest)

In [None]:
EN=ElasticNetModel(alpha=0.1,l1_penalty=0)
elasticnet_model=EN.train(Xtrain,ytrain)
forecast_vals_EN=EN.predict(Xtest)
# Create a series for coefficients mapped to feature names
# 2. Create the Intercept row
intercept_df = pd.DataFrame({'Features': ['Intercept'], 'Coefficients': [elasticnet_model.intercept_]})

# 3. Create the Coefficients DataFrame
coeff_df = pd.DataFrame({
    'Features': Xtest.columns,
    'Coefficients': elasticnet_model.coef_
})

# 4. Vertically stack them (axis=0) to create one cohesive table
elasticnet_model_coeff_df = pd.concat([intercept_df, coeff_df], axis=0).reset_index(drop=True)
elasticnet_model_coeff_df

In [None]:
metric=ForecastingMetrics(y_true=ytest,y_pred=forecast_vals_xgb)
print(f' MAPE val for XGBoost: {metric.MAPE()} % \n Weighted-MAPE val: {metric.WMAPE()} %')

metric=ForecastingMetrics(y_true=ytest,y_pred=forecast_vals_EN)
print(f' MAPE val for ElasticNet: {metric.MAPE()} % \n Weighted-MAPE val: {metric.WMAPE()} %')

In [None]:
test_index = Xtest.sample(n=1,random_state=42).index[0]
test_input=Xtest.loc[test_index].to_frame()
test_input.columns=['Value']
intercept_row = pd.DataFrame({'Value': [1]}, index=['Intercept'])
test_input_df=pd.concat([intercept_row,test_input],axis=0)
test_input_df

In [None]:
import plotly.graph_objects as go
import numpy as np
# 1. Calculate the contribution of each component
# Multiplying the actual feature values by their model weights
# We use .flatten() or .iloc[:, 0] to ensure we have a simple numeric array
contribution_values = (test_input_df['Value'].values * elasticnet_model_coeff_df['Coefficients'].values)

# 2. Define the labels (X-axis)
# Features from our coefficient table + a final Total label
feature_names = elasticnet_model_coeff_df['Features'].tolist()
steps = feature_names + ['Total Forecast']

# 3. Define the values (Y-axis)
# We add a 0 at the end because Plotly calculates the 'total' automatically
y_values = list(contribution_values) + [0]

# 4. Define the measures
# 'absolute' for the start (Intercept), 'relative' for features, 'total' for the end
measures = ["absolute"] + (["relative"] * (len(feature_names) - 1)) + ["total"]

# Create Waterfall chart
fig = go.Figure(go.Waterfall(
    name="Regression Breakdown",
    orientation="v",
    measure=measures,
    x=steps,
    y=y_values,
    connector={"line": {"color": "rgba(63, 63, 63, 0.5)"}},
    increasing={"marker": {"color": "#2ecc71"}}, # Green for positive impact
    decreasing={"marker": {"color": "#e74c3c"}}, # Red for negative impact
    totals={"marker": {"color": "#3498db"}}      # Blue for the final prediction
))

# Layout options
fig.update_layout(
    autosize=True,
    title=f"ElasticNet Prediction Breakdown<br><sup>Total Predicted Value: {np.sum(contribution_values):.2f}</sup>",
    waterfallgroupgap=0.5,
    yaxis_title="Contribution Value",
    showlegend=False,
    xaxis=dict(tickangle=270, automargin=True),
    template="plotly_white"
)
fig.show()