In [10]:
import pandas as pd
import numpy as np

from statsmodels.robust import mad
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
import itertools
from statsmodels.tsa.statespace.sarimax import SARIMAX
import statsmodels

from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

---
## 讀取資料

In [3]:
input_dir = './data/'

sales_train_validation = pd.read_csv(input_dir+'sales_train_validation.csv')
calendar = pd.read_csv(input_dir+'calendar.csv')
sales_prices = pd.read_csv(input_dir+'sell_prices.csv')

---
## 將資料調降為半精度

In [4]:
def downcast_dtypes(df):
    float_cols = [c for c in df if df[c].dtype == "float64"]
    int_cols = [c for c in df if df[c].dtype in ["int64", "int32"]]
    df[float_cols] = df[float_cols].astype(np.float32)
    df[int_cols] = df[int_cols].astype(np.int16)
    return df

In [5]:
sales_train_validation = downcast_dtypes(sales_train_validation)
sales_prices = downcast_dtypes(sales_prices)
calendar = downcast_dtypes(calendar)

---
## 對資料進行抽樣

In [6]:
ids = sorted(list(sales_train_validation['id']))

d_cols = [c for c in sales_train_validation.columns if 'd_' in c ]

---
## 使用 Moving Average 進行資料的修正


In [7]:
def average_smoothing(signal, kernel_size=3, stride=1):
    sample = [0]*(kernel_size-stride) # 通过 len(y_a1) 可以发现与原始数据同长度
    start = 0
    end = kernel_size
    while end <= len(signal):
        start = start + stride
        end = end + stride
        sample.extend([np.mean(signal[start:end])])
    return np.array(sample)

In [8]:
x_1 = sales_train_validation.loc[sales_train_validation['id'] == ids[2]].set_index('id')[d_cols].values[0][0:90] 
x_2 = sales_train_validation.loc[sales_train_validation['id'] == ids[6]].set_index('id')[d_cols].values[0][90:180] 
x_3 = sales_train_validation.loc[sales_train_validation['id'] == ids[7]].set_index('id')[d_cols].values[0][1800:] 

In [9]:
y_a1 = average_smoothing(x_1)
y_a2 = average_smoothing(x_2)
y_a3 = average_smoothing(x_3)

---
## 建立模型

In [11]:
store_sum = sales_train_validation.iloc[:,6:]
store_sum.head()

Unnamed: 0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,0,0,0,0,0,0,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
3,0,0,0,0,0,0,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
4,0,0,0,0,0,0,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4


In [12]:
train_datasets= store_sum.iloc[0:1883]
val_datasets= store_sum.iloc[1883:1913]

In [13]:
def sarima_train_test(t_series, p = 2, d = 1, r = 2, NUM_TO_FORECAST = 56, do_plot_results = True):
    
    NUM_TO_FORECAST = NUM_TO_FORECAST  # Similar to train test splits.
    dates = np.arange(t_series.shape[0])
    
    model = SARIMAX(t_series, order = (p, d, r), trend = 'c')
    results = model.fit()

    forecast = results.get_prediction(start = - NUM_TO_FORECAST)
    mean_forecast = forecast.predicted_mean
    conf_int = forecast.conf_int()

    print(mean_forecast.shape)
    
    residuals = results.resid # Residual
    mae_sarima = np.mean(np.abs(residuals))
    print('Mean absolute error: ', mae_sarima)
    print(results.summary())
    return mean_forecast

In [None]:
predictions = []

for col, name in enumerate(train_datasets.columns):
    predictions.append(sarima_train_test(train_datasets.iloc[:,col],NUM_TO_FORECAST=28))

predictions = np.array(predictions).reshape((-1, 28))

---
## 寫出 Submission File

In [None]:
import time

submission = pd.DataFrame(data=np.array(predictions).reshape(28,-1))

submission = submission.T
    
submission = pd.concat((submission, submission), ignore_index=True)

sample_submission = pd.read_csv("./data" + "/sample_submission.csv")
    
idColumn = sample_submission[["id"]]
    
submission[["id"]] = idColumn  

cols = list(submission.columns)
cols = cols[-1:] + cols[:-1]
submission = submission[cols]

colsdeneme = ["id"] + [f"F{i}" for i in range (1,29)]

submission.columns = colsdeneme

currentDateTime = time.strftime("%d%m%Y_%H%M%S")

submission.to_csv("submission.csv", index=False)