In [None]:
import pandas as pd
import numpy as np
from scipy import signal
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
import pmdarima as pm
from pmdarima import utils
from pmdarima import arima
from pmdarima import model_selection
from statsmodels.tsa.statespace.sarimax import SARIMAX
import optuna
from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics

In [None]:
url = 'https://www.salesanalytics.co.jp/591h'  # データセットのあるURL
df = pd.read_csv(url,  # 読み込むデータのURL
                 index_col='Month',  # 変数「Month」をインデックスに設定
                 parse_dates=True)  # インデックスを日付型に設定

In [None]:
plt.plot(np.diff(df.Passengers))
# plt.plot(df.Passengers.diff(1)) こちらでもOK


In [None]:
plt.plot(np.diff(df.Passengers, n=2))


In [None]:
plt.plot(np.log(df.Passengers))


In [None]:
# 対数を取ったものにADF検定（原系列）
dftest = adfuller(np.log(df.Passengers))
print('ADF Statistic: %f' % dftest[0]) #検定統計量の表示
print('p-value: %f' % dftest[1])


In [None]:
plt.plot(np.diff(np.log(df.Passengers)))


In [None]:
plt.plot(np.diff(np.log(df.Passengers)))


In [None]:
# 対数変換後、階差をとったものにADF検定
dftest = adfuller(np.diff(np.log(df.Passengers)))
print('ADF Statistic: %f' % dftest[0])
print('p-value: %f' % dftest[1])


In [None]:
plt.plot(np.diff(np.log(df.Passengers), n=2))


In [None]:
dftest = adfuller(np.diff(np.log(df.Passengers),n=2))
print('ADF Statistic: %f' % dftest[0]) #検定統計量を表示させる
print('p-value: %f' % dftest[1])
print('Critical values :') 
for k, v in dftest[4].items(): #1%,5%,10%信頼区間を表示させている
    print('\t', k, v)


In [None]:
data = np.log(df.Passengers)
plt.plot(data.diff(6))
plt.show()


In [None]:
data = np.log(df.Passengers)
plt.plot(data.diff(12))
plt.show()


In [None]:
# 対数化後、階差数列化後、季節階差を除いたものにADF検定
dftest = adfuller(pd.DataFrame(np.diff(np.log(df.Passengers))).diff(12).dropna())
print('p-value: %f' % dftest[1])


In [None]:
# 原系列
plot_acf(df.Passengers, lags=20)
plot_pacf(df.Passengers, lags=20)


In [None]:
# 対数化後、階差数列化後、季節階差を除いたもの
syori_dt = pd.DataFrame(np.diff(np.log(df.Passengers))).diff(12).dropna()
acf = plot_acf(syori_dt, lags=20)
pacf = plot_pacf(syori_dt, lags=20)


In [None]:
arima_model = pm.auto_arima(train,
                            seasonal=True,
                            m=12,
                            trace=True,
                            n_jobs=-1,
                            maxiter=10)


df_train, df_test = model_selection.train_test_split(df, test_size=12)

# 予測
# 学習データの期間の予測値
train_pred = arima_model.predict_in_sample()
# テストデータの期間の予測値
test_pred, test_pred_ci = arima_model.predict(
    n_periods=df_test.shape[0],
    return_conf_int=True
)
# テストデータで精度検証
print('RMSE:')
print(np.sqrt(mean_squared_error(df_test, test_pred)))
print('MAE:')
print(mean_absolute_error(df_test, test_pred))
print('MAPE:')
print(mean_absolute_percentage_error(df_test, test_pred))

# グラフ化
fig, ax = plt.subplots()
ax.plot(df_train[24:].index, df_train[24:].values, label="actual(train dataset)")
ax.plot(df_test.index, df_test.values, label="actual(test dataset)", color="gray")
ax.plot(df_train[24:].index, train_pred[24:], color="c")
ax.plot(df_test.index, test_pred, label="auto ARIMA", color="c")
ax.fill_between(
    df_test.index,
    test_pred_ci[:, 0],
    test_pred_ci[:, 1],
    color='c',
    alpha=.2)
ax.legend()