In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing

## Simulate data

In [None]:
x = np.arange(0, 50*np.pi + np.pi/4, np.pi/4)
c = 0
y1 = np.sin(x) + c
y2 = np.sin(x) + 0.5*np.random.standard_normal(x.shape) + c
y3 = np.sin(x) + 0.5*np.random.standard_normal(x.shape) + x/10 + c
y4 = 2 * np.sin(x) - 0.2 * np.cos(x) + 13 - 0.5*np.random.standard_normal(x.shape) + c

In [None]:
fig, axs = plt.subplots(1,4,figsize=[40,5])
axs[0].plot(x, y1)
axs[1].plot(x, y2)
axs[2].plot(x, y3)
axs[3].plot(x, y4)

In [None]:
y = y3

In [None]:
split = int(0.8*x.shape[0])
train = y[:split]
test = y[split:]

## Determine if data is stationary and decompose

In [None]:
adfuller(y)

In [None]:
plot_acf(y); 

In [None]:
decomp = seasonal_decompose(y, period=8)

In [None]:
fig, axs = plt.subplots(4,1,figsize=[20, 10])
axs[0].plot(decomp.observed)
axs[1].plot(decomp.trend)
axs[2].plot(decomp.seasonal)
axs[3].plot(decomp.resid)

## Build models and find confidence intervals

In [None]:
model_arima = ARIMA(train, order=(1,0,1), seasonal_order=(1,0,1,8)).fit()
model_ets = ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=8).fit(method='ls')

model_arima_fit = model_arima.fittedvalues
model_ets_fit = model_ets.fittedvalues

In [None]:
x_train = np.arange(train.shape[0])
x_test = train.shape[0] + np.arange(test.shape[0])

pred_arima = model_arima.forecast(test.shape[0])
pred_ets = model_ets.forecast(test.shape[0])

In [None]:
# Forecast confidence (arima)
_ = model_arima.get_forecast(x_test.shape[0])  # 95% conf
fc = _.predicted_mean
conf_arima = _.conf_int(alpha=0.05)

# Forecast confidence (ets)
sim_series = model_ets.simulate(41, repetitions=100, error="add").T
mean = np.mean(sim_series, axis=0)
std = np.std(sim_series, axis=0)
low = mean - 2 * std
high = mean + 2 * std
conf_ets = np.array([low, high]).T

In [None]:
plt.plot(x_train, train, label='train')
plt.plot(x_test, test, label='test')
plt.plot(x_train, model_arima_fit, label='ARIMA', color='C2')
plt.plot(x_test, pred_arima, color='C2')
plt.fill_between(x_test, conf_arima[:, 0], conf_arima[:, 1], color='C2', alpha=.15)
plt.plot(x_train, model_ets_fit, label='ETS', color='C3')
plt.plot(x_test, pred_ets, color='C3')
plt.fill_between(x_test, conf_ets[:, 0], conf_ets[:, 1], color='C3', alpha=.15)
plt.legend()

## Compare models

In [None]:
plt.hist(model_arima.resid, bins=20, alpha=0.5, color='C2', density=True)
plt.hist(model_ets.resid, bins=20, alpha=0.5, color='C3', density=True)

In [None]:
plot_acf(model_arima.resid);
plot_acf(model_ets.resid);

In [None]:
n = 200
x_forecast = np.arange(n) + train.shape[0]

plt.plot(x_train, train, label='train')
plt.plot(x_test, test, label='test')
plt.plot(x_forecast, model_arima.forecast(n), label='ARIMA')
plt.plot(x_forecast, model_ets.forecast(n), label='ETS')
plt.legend()

In [None]:
def calculate_metrics(models):
    aic = np.array([model.aic for model in models]).reshape(len(models),1)
    
    mse = []
    mae = []
    args = zip([model_arima_fit, model_ets_fit], [pred_arima, pred_ets])
    for model_fit, pred in args:
        train_mse = np.mean((train - model_fit) ** 2)
        test_mse = np.mean((test - pred) ** 2)
        mse.append([train_mse, test_mse])
    
        train_mae = np.mean(np.abs(train - model_fit))
        test_mae = np.mean(np.abs(test - pred))
        mae.append([train_mae, test_mae])
    mse = np.array(mse)
    mae = np.array(mae)
    metrics = pd.DataFrame(np.hstack((aic, mse, mae)).T, columns=['ARIMA', 'ETS'],
                           index=['AIC', 'Train MSE', 'Test MSE', 'Train MAE', 'Test MAE'])

    return metrics

In [None]:
models = [model_arima, model_ets]

In [None]:
metrics = calculate_metrics(models)

In [None]:
metrics