In [None]:
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

%config InlineBackend.figure_formats = 'svg'

In [None]:
df = pd.read_csv("../data/raw/eem.csv", parse_dates=True)
df.head()

In [None]:
plt.plot(df["Close"])

In [None]:
def create_sequences(data, window_size=7):
    xs, ys = [], []
    for i in range(len(data) - window_size):
        x = data[i : (i + window_size)]
        y = data[i + window_size]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [None]:
Xy_train, Xy_test = train_test_split(df["Close"].values, test_size=0.3, shuffle=False)
X_train, y_train = create_sequences(Xy_train, window_size=7)
X_test, y_test = create_sequences(Xy_test, window_size=7)

In [None]:
result = adfuller(df["Close"])
print("ADF Statistic:", result[0])
print("p-value:", result[1])
print("Critical Values:", result[4])

In [None]:
d1 = df["Close"].diff().dropna()
result = adfuller(d1)
print("ADF Statistic:", result[0])
print("p-value:", result[1])
print("Critical Values:", result[4])
d1.plot()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# ACF plot
plot_acf(d1, ax=axes[0])
axes[0].set_title("Autocorrelation Function (ACF)")

# PACF plot
plot_pacf(d1, ax=axes[1])
axes[1].set_title("Partial Autocorrelation Function (PACF)")

plt.show()

In [None]:
# arima = ARIMA(df['Close'], order=(1, 1, 1))
# model = arima.fit()

points = []

for i, data in enumerate(X_test):
    # for i in range(len(X_test)):
    arima = ARIMA(data, order=(0, 1, 0))
    model = arima.fit()
    pred = model.forecast(steps=1)
    points.append(pred)
    if i % 100 == 0:
        print(f"{i} / {len(X_test)}")
plt.plot(y_test, label="True")
plt.plot(points, "--", label="Predicted")
plt.legend()
plt.show()