In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from plotnine import *
from statsmodels.tsa.arima.model import ARIMA

## Pre-process Data

In [2]:
# Load Data
calendar = pd.read_csv("Data/calendar_afcs2020.csv")
sales = pd.read_csv("Data/sales_train_evaluation_afcs2020.csv")
train = pd.read_csv("Data/sales_train_validation_afcs2020.csv")
sample_submission = pd.read_csv("Data/sample_submission_afcs2020.csv")
price = pd.read_csv("Data/sell_prices_afcs2020.csv")

In [3]:
# Convert days into dates
calendar['date'] = pd.to_datetime(calendar['date'])
datetime = calendar[["date","d"]].copy()

In [4]:
# price["id"] = price["item_id"] + "_" + price["store_id"] + "_validation"

In [75]:
# # Visualization
# # plt.figure(figsize=(12,12))
# plt.plot(ts_train["ds"],ts_train["y"],label="Train")
# plt.plot(ts_test["ds"],ts_test["y"],label="Test")
# plt.plot(ts_test["ds"],pred,label="Forecast")
# plt.title("Time Series Forecasting")
# plt.legend()

## Forecast Models

In [49]:
# Moving Average (MA)
def moving_average(train, h):
    
    # fit model
    model = ARIMA(train, order=(0, 0, 1))
    model_fit = model.fit()
    
    # make prediction
    start_index = len(train) + 1
    end_index = start_index + h
    fc = model_fit.predict(start=start_index, end=end_index)
    
    return fc

## Forecast function

In [155]:
def forecast(MA=False):
    """Forecast based on historical sales of items"""
    
    # Do nothing if no forecast is selected
    if not MA:
        print ("No forecast has been made")
        return 0
    
    # Set parameters
    h = 28
    output = {}

    # Forecast sales per product item
    for i in range(1, len(train) + 1):
        fc = []
        
        # Create train data per item and merge with datetime
        item_train = train.iloc[(i-1):i].iloc[:,1:].T.reset_index()
        item_train = pd.merge(left=item_train,right=datetime,left_on="index",right_on="d",how="left")
        item_train.drop(["d","index"],axis=1,inplace=True)
        item_train = item_train.rename({"date":"ds",item_train.columns[0]:"y"},axis=1)
        item_train = item_train[item_train.columns[::-1]]
        
        # Create test data per item and merge with datetime
        item_sales = sales.iloc[(i-1):i][sales.columns[-(h+1):]].iloc[:,1:].T.reset_index()
        item_sales = pd.merge(left=item_sales,right=datetime,left_on="index",right_on="d",how="left")
        item_sales.drop(["d","index"],axis=1,inplace=True)
        item_sales = item_sales.rename({"date":"ds",item_sales.columns[0]:"y"},axis=1)

        # Create time series of train and test data
        ts_train = item_train.iloc[-int(h/2*8):,:].copy()
        ts_test = item_sales[item_sales.columns[::-1]]
        data = ts_train["y"].tolist()

        # Forecast using Moving Average
        if MA:
            fc_str = "MA"
            pred = moving_average(data, h)[1:]
        
        # Create output
        fc.append(train.id[(i-1)])
        fc.extend(pred)
        output[(i-1)] = fc

    # Convert output to dataframe
    output_df = pd.DataFrame.from_dict(output, orient='index', columns=sample_submission.columns)
    output_df.to_csv('Output/output_{}.csv'.format(fc_str), index=False)
    
    return output_df

In [156]:
forecast(MA=True)



Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_2_001_CA_3_validation,0.115786,0.115786,0.115786,0.115786,0.115786,0.115786,0.115786,0.115786,0.115786,...,0.115786,0.115786,0.115786,0.115786,0.115786,0.115786,0.115786,0.115786,0.115786,0.115786
1,HOBBIES_2_002_CA_3_validation,0.178493,0.178493,0.178493,0.178493,0.178493,0.178493,0.178493,0.178493,0.178493,...,0.178493,0.178493,0.178493,0.178493,0.178493,0.178493,0.178493,0.178493,0.178493,0.178493
2,HOBBIES_2_003_CA_3_validation,0.670597,0.670597,0.670597,0.670597,0.670597,0.670597,0.670597,0.670597,0.670597,...,0.670597,0.670597,0.670597,0.670597,0.670597,0.670597,0.670597,0.670597,0.670597,0.670597
3,HOBBIES_2_004_CA_3_validation,0.257982,0.257982,0.257982,0.257982,0.257982,0.257982,0.257982,0.257982,0.257982,...,0.257982,0.257982,0.257982,0.257982,0.257982,0.257982,0.257982,0.257982,0.257982,0.257982
4,HOBBIES_2_005_CA_3_validation,0.089517,0.089517,0.089517,0.089517,0.089517,0.089517,0.089517,0.089517,0.089517,...,0.089517,0.089517,0.089517,0.089517,0.089517,0.089517,0.089517,0.089517,0.089517,0.089517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144,HOBBIES_2_145_CA_3_validation,0.437722,0.437722,0.437722,0.437722,0.437722,0.437722,0.437722,0.437722,0.437722,...,0.437722,0.437722,0.437722,0.437722,0.437722,0.437722,0.437722,0.437722,0.437722,0.437722
145,HOBBIES_2_146_CA_3_validation,0.122272,0.122272,0.122272,0.122272,0.122272,0.122272,0.122272,0.122272,0.122272,...,0.122272,0.122272,0.122272,0.122272,0.122272,0.122272,0.122272,0.122272,0.122272,0.122272
146,HOBBIES_2_147_CA_3_validation,0.330087,0.330087,0.330087,0.330087,0.330087,0.330087,0.330087,0.330087,0.330087,...,0.330087,0.330087,0.330087,0.330087,0.330087,0.330087,0.330087,0.330087,0.330087,0.330087
147,HOBBIES_2_148_CA_3_validation,0.240225,0.240225,0.240225,0.240225,0.240225,0.240225,0.240225,0.240225,0.240225,...,0.240225,0.240225,0.240225,0.240225,0.240225,0.240225,0.240225,0.240225,0.240225,0.240225
