In [None]:
import pandas as pd
import numpy as np
import calendar
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.vector_ar.var_model import VAR

In [None]:
store_calendar = pd.read_csv("Calendar_with_cycled_days.csv", index_col = 0)
date_converter = dict(zip(store_calendar['d'], store_calendar.index))

sell_prices = pd.read_csv("sell_prices_afcs2021.csv", index_col=0)

sample_submission = pd.read_csv("sample_submission_afcs2021.csv", index_col=0)

train_data = pd.read_csv("sales_train_validation_afcs2021.csv", index_col=0)
test_data = pd.read_csv("sales_test_validation_afcs2021.csv", index_col=0)
train_data = train_data.rename(columns=date_converter)
test_data = test_data.rename(columns=date_converter)

total_sales = train_data.sum()


# Attempt at forecasting

In [None]:
# Preprocess test data for calculating the RMSE
ts_test_data = test_data.transpose()
ts_test_data.index = pd.to_datetime(ts_test_data.index)
products = list(ts_test_data.columns.values)

for i in products:
    p_name = "_".join(i.split("_")[:3])
    ts_test_data = ts_test_data.rename(columns = {i : p_name})
ts_test_data.index.name = 'date'
# ts_test_data

In [None]:
# Preprocess the train data
ts_train_data = train_data.transpose()
ts_train_data.index = pd.to_datetime(ts_train_data.index)
products = list(ts_train_data.columns.values)
originals = list(ts_train_data.columns.values)

for i in products:
    p_name = "_".join(i.split("_")[:3])
    ts_train_data = ts_train_data.rename(columns = {i : p_name})

ts_train_data.index.name = 'date'
# ts_train_data

In [None]:
# Reset index for merging
store_calendar = store_calendar.reset_index()

In [None]:
# Merge the calendar and sell_price dataframes
new = pd.merge(sell_prices, store_calendar, on='wm_yr_wk')
new = new.set_index('date')
new.index = pd.to_datetime(new.index)
new

In [None]:
# Add noise to the sell price so it can be added to the VAR model (variables can't be constant)
original = new['sell_price']
noise = np.random.normal(0, .01, len(new))
new_signal = original + noise
new['sell_price'] = new['sell_price'] + noise
new

In [None]:
# Create the forecasts
def make_forecast(train_data, price, product_names, original_product_names):
    submission = []
    
    for i in range(len(product_names)):
        df_train = train_data[product_names[i]]
        df_sales = price[price['item_id'] == product_names[i]]
        df = pd.merge(df_sales, df_train, on="date")[[product_names[i], 'sin_wday', 'cos_wday', 'sell_price']]
        
#         The old Holt_Winters model:
#         model = ExponentialSmoothing(df ,seasonal_periods=7 ,trend='add', seasonal='add') 
#         fitted = model.fit() 
#         fc = fitted.forecast(28).tolist()

        model = VAR(endog=df)
        model_fit = model.fit()
        prediction = model_fit.forecast(model_fit.y, steps=28)

        fc = prediction[:,0].tolist()
        fc.insert(0, originals[i])
        submission.append(fc)

    return submission
 
products = list(ts_train_data.columns.values)
df = make_forecast(ts_train_data, new, products, originals)

In [None]:
# Convert forecasts to a dataframe
df = pd.DataFrame(df, columns=['id','F1','F2','F3','F4','F5','F6','F7','F8','F9','F10','F11','F12','F13','F14','F15','F16','F17','F18','F19','F20','F21','F22','F23','F24','F25','F26','F27','F28'])
df = df.set_index('id')
df

In [None]:
# Calculate the RMSE
np.sqrt(mean_squared_error(df, test_data))

# Make CSV file

In [None]:
# Save the best RMSE for comparison
best = df
np.sqrt(mean_squared_error(best, test_data))

In [None]:
# Create the CSV file
sub = df.reset_index()
sub.to_csv("submission.csv", index=False)