In [None]:
# Cell 1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from src.data_preprocessing import load_csv, ensure_ts_format
from src.feature_engineering import create_datetime_features, create_lags, create_rollings, drop_na_for_model
from src.model_prophet import fit_prophet, forecast_prophet
from src.model_arima import fit_sarima, forecast_sarima
from src.model_lstm import train_lstm, forecast_lstm
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline


In [None]:
# Cell 2
df = load_csv('../data/sales.csv', date_col='date')
df = ensure_ts_format(df, date_col='date', target_col='sales')
df.head()


In [None]:
# Cell 3
plt.figure(figsize=(12,4))
plt.plot(df['date'], df['sales'])
plt.title('Sales over time')
plt.show()

df['sales'].describe()


In [None]:
# Cell 4
df['month'] = df['date'].dt.month
sns.boxplot(x='month', y='sales', data=df)


In [None]:
# Cell 5
fe = create_datetime_features(df, date_col='date')
fe = create_lags(fe, 'sales', lags=[1,7,14])
fe = create_rollings(fe, 'sales', windows=[7,14])
fe = drop_na_for_model(fe)
fe.shape


In [None]:
# Cell 6
train_size = int(len(df) * 0.8)
train = df.iloc[:train_size]
test = df.iloc[train_size:]
print(len(train), len(test))


In [None]:
# Cell 7
m = fit_prophet(train)
fc = forecast_prophet(m, periods=len(test))
pred = fc.tail(len(test))['yhat'].values
print('MAE:', mean_absolute_error(test['sales'], pred))


In [None]:
# Cell 8
ts = train.set_index('date')['sales']
res = fit_sarima(ts, order=(1,1,1), seasonal_order=(0,1,1,7))
mean, _ = forecast_sarima(res, steps=len(test))
print('MAE ARIMA:', mean_absolute_error(test['sales'], mean.values))


In [None]:
# Cell 9
series = train['sales'].values
model, scaler = train_lstm(series, seq_len=30, epochs=30, batch_size=16, model_path='lstm_demo.h5')
preds_lstm = forecast_lstm(model, scaler, np.concatenate([train['sales'].values, test['sales'].values])[:], steps=len(test), seq_len=30)
print('MAE LSTM:', mean_absolute_error(test['sales'].values, preds_lstm))


In [None]:
# Cell 10
from src.ensemble import weighted_ensemble
preds = {'prophet': pred, 'arima': mean.values, 'lstm': preds_lstm}
ens = weighted_ensemble(preds, weights={'prophet':0.4, 'arima':0.3, 'lstm':0.3})
print('Ensemble MAE:', mean_absolute_error(test['sales'].values, ens))


In [None]:
# Cell 11
plt.figure(figsize=(12,6))
plt.plot(test['date'], test['sales'].values, label='actual')
plt.plot(test['date'], pred, label='prophet')
plt.plot(test['date'], mean.values, label='arima')
plt.plot(test['date'], preds_lstm, label='lstm')
plt.plot(test['date'], ens, label='ensemble', linewidth=2, linestyle='--')
plt.legend()
plt.show()