In [None]:
from utils import *
from model_prophet import *
from tbats import TBATS
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.stats import pearsonr
import statsmodels.api as sm
from pmdarima import auto_arima
from statsmodels.tsa.statespace import exponential_smoothing
from statsmodels.tsa.seasonal import MSTL
from statsmodels.tsa.api import STLForeca

In [None]:
data = Data()

In [None]:
arima_df = pd.DataFrame({'ds': data.daily_sales.index, 'y': data.daily_sales.values})

autoarima_model = auto_arima(arima_df['y'], seasonal=True, trace=True)
autoarima_model.summary()
autoarima_forecast = autoarima_model.predict(n_periods=28)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(data.daily_sales_test.index, data.daily_sales_test.values, label='Aggregated Sales (True Values)', color='blue')
plt.plot(data.daily_sales_test.index, forecast['yhat'][1913:], label='Aggregated Sales (Prophet)', color='green')
plt.plot(data.daily_sales_test.index, autoarima_forecast.values, label='Aggregated Sales (ARIMA)', color='red')
plt.title('Aggregated Sales over Time')
plt.xlabel('Date')
plt.ylabel('Total Sales')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# TBATS model on aggregated sales data

tbats_df = pd.DataFrame({'ds': data.daily_sales.index, 'y': data.daily_sales.values})

tbats_model = TBATS(seasonal_periods=(7, 365))
tbats_fit = tbats_model.fit(tbats_df['y'])
tbats_forecast = tbats_fit.forecast(steps=28)


In [None]:
# Reading model parameters
print(tbats_fit.params.alpha)
print(tbats_fit.params.beta)
print(tbats_fit.params.x0)
print(tbats_fit.params.components.use_box_cox)
print(tbats_fit.params.components.seasonal_harmonics)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(data.daily_sales_test.index, data.daily_sales_test.values, label='Aggregated Sales (True Values)', color='blue')
plt.plot(data.daily_sales_test.index, forecast['yhat'][1913:], label='Aggregated Sales (Prophet)', color='green')
plt.plot(data.daily_sales_test.index, autoarima_forecast.values, label='Aggregated Sales (ARIMA)', color='red')
plt.plot(data.daily_sales_test.index, tbats_forecast, label='Aggregated Sales (TBATS)', color='yellow')
plt.title('Aggregated Sales over Time')
plt.xlabel('Date')
plt.ylabel('Total Sales')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Get mean distribution of sales for each product

data.merged_data['total_sales'] = data.merged_data.groupby('day')['sales'].transform('sum')
data.merged_data['percentage_sales'] = (data.merged_data['sales'] / data.merged_data['total_sales'])
mean_percentage_by_product = data.merged_data.groupby('id')['percentage_sales'].mean().reset_index()
print(mean_percentage_by_product)

In [None]:
estimated_sales = (mean_percentage_by_product['percentage_sales'][:, np.newaxis] * tbats_forecast).round()

#print(sales_test.iloc[:, 1:])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(data.sales_test.iloc[:, 1:], estimated_sales))

print(f'RMSE: {rmse}')