In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# %matplotlib inline

import pmdarima as pm
print(f"Using pmdarima {pm.__version__}")
# Using pmdarima 1.5.2

In [None]:
from pmdarima.datasets.stocks import load_msft

df = load_msft()
df.head()

In [None]:
from pandas.plotting import lag_plot

fig, axes = plt.subplots(3, 2, figsize=(8, 12))
plt.title('MSFT Autocorrelation plot')

# The axis coordinates for the plots
ax_idcs = [
    (0, 0),
    (0, 1),
    (1, 0),
    (1, 1),
    (2, 0),
    (2, 1)
]

for lag, ax_coords in enumerate(ax_idcs, 1):
    ax_row, ax_col = ax_coords
    axis = axes[ax_row][ax_col]
    lag_plot(df['Open'], lag=lag, ax=axis)
    axis.set_title(f"Lag={lag}")

plt.show()

In [None]:
df.head()

In [None]:
y_train = df["Close"][:int(len(df) * 0.8)]
test_data = df[int(len(df) * 0.8):int(len(df) * 0.8)+100]
y_test = test_data["Close"]
df.shape, y_train.shape, y_test.shape, len(y_train) + len(y_test)

In [None]:
from pmdarima.arima import ndiffs

kpss_diffs = ndiffs(y_train, alpha=0.05, test='kpss', max_d=6)
adf_diffs = ndiffs(y_train, alpha=0.05, test='adf', max_d=6)
n_diffs = max(adf_diffs, kpss_diffs)

print(f"Estimated differencing term: {n_diffs}")
# Estimated differencing term: 1

In [None]:
auto = pm.auto_arima(y_train, d=n_diffs, seasonal=False, stepwise=True,
                     suppress_warnings=True, error_action="ignore", max_p=6,
                     max_order=None, trace=True)

In [None]:
from copy import deepcopy


In [None]:
print(auto.order)
# (0, 1, 0)

In [None]:
from sklearn.metrics import mean_squared_error
from pmdarima.metrics import smape
from tqdm.notebook import trange, tqdm
model = deepcopy(auto)  # seeded from the model we've already fit
n_days = 5
def forecast_one_step():
    fc, conf_int = model.predict(n_periods=n_days, return_conf_int=True)
    return (
        fc,
        np.asarray(conf_int))

forecasts = []
confidence_intervals = []
out_mse = []
n_test_days=len(y_test[:-n_days])
for i,new_ob in tqdm(enumerate(y_test[:-n_days])):
    fc, conf = forecast_one_step()
    out_mse.append(mean_squared_error(y_test[i:n_days+i], fc))
    print(f"Mean squared error: {mean_squared_error(y_test[i:n_days+i], fc)}")
    print(f"SMAPE: {smape(y_test[i:n_days+i], fc)}")
    # Updates the existing model with a small number of MLE steps
    model.update(new_ob)


# Mean squared error: 0.34238951346274243
# SMAPE: 0.9825490519101439

In [None]:
np.mean(out_mse)

In [None]:
np.percentile(out_mse, 80)

In [None]:
forecasts = np.asarray(forecasts)
confidence_intervals = np.asarray(confidence_intervals)

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(12, 12))

# --------------------- Actual vs. Predicted --------------------------
axes[0].plot(y_train, color='blue', label='Training Data')
# axes[0].plot(test_data.index, forecasts, color='green', marker='o',
#              label='Predicted Price')

axes[0].plot(test_data.index[:n_days], y_test[:n_days], color='red', label='Actual Price')
axes[0].set_title('Microsoft Prices Prediction')
axes[0].set_xlabel('Dates')
axes[0].set_ylabel('Prices')

axes[0].set_xticks(np.arange(0, 7982, 1300).tolist(), df['Date'][0:7982:1300].tolist())
axes[0].legend()


# ------------------ Predicted with confidence intervals ----------------
axes[1].plot(y_train, color='blue', label='Training Data')
axes[1].plot(test_data.index[:n_days], forecasts, color='green',
             label='Predicted Price')

axes[1].set_title('Prices Predictions & Confidence Intervals')
axes[1].set_xlabel('Dates')
axes[1].set_ylabel('Prices')

conf_int = np.asarray(confidence_intervals)
axes[1].fill_between(test_data.index[:n_days],
                     conf_int[:, 0], conf_int[:, 1],
                     alpha=0.9, color='orange',
                     label="Confidence Intervals")

axes[1].set_xticks(np.arange(0, 7982, 1300).tolist(), df['Date'][0:7982:1300].tolist())
axes[1].legend()