In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

%config InlineBackend.figure_format = 'svg'

In [None]:
torch.manual_seed(0)
device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [None]:
# df = yf.download("EEM", end="2025-01-01", interval="1d",multi_level_index=False)
# df.to_csv("../data/raw/eem.csv", index=True)

In [None]:
df = pd.read_csv("../data/raw/eem.csv", parse_dates=True)

In [None]:
df["Close"].plot(title="EEM", figsize=(12, 8))

In [None]:
class GRU(nn.Module):
    def __init__(self, input_size, hidden_layer_size, num_layers):
        super(GRU, self).__init__()
        self.num_layers = num_layers
        self.hidden_layer_size = hidden_layer_size
        self.gru = nn.GRU(
            input_size, hidden_layer_size, num_layers=num_layers, batch_first=True
        )
        self.fc = nn.Linear(hidden_layer_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_layer_size).to(device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [None]:
def create_sequences(data, window_size=7):
    xs, ys = [], []
    for i in range(len(data) - window_size):
        x = data[i : (i + window_size)]
        y = data[i + window_size]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [None]:
num_inputs = 30
gru = GRU(1, hidden_layer_size=64, num_layers=4).to(device)
loss = nn.MSELoss()
optimizer = optim.Adam(gru.parameters(), lr=0.005)
epochs = 1000

In [None]:
x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

In [None]:
Xy_train, Xy_test = train_test_split(df["Close"].values, test_size=0.3, shuffle=False)

In [None]:
X_train, y_train = create_sequences(Xy_train, window_size=num_inputs)
X_test, y_test = create_sequences(Xy_test, window_size=num_inputs)
X_train = x_scaler.fit_transform(X_train)
X_test = x_scaler.transform(X_test)
y_train = y_scaler.fit_transform(y_train.reshape(-1, 1)).reshape(-1)
y_test = y_scaler.transform(y_test.reshape(-1, 1)).reshape(-1)

In [None]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(-1).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(-1).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(-1).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(-1).to(device)

for epoch in range(epochs):
    gru.train()
    optimizer.zero_grad()

    output = gru(X_train_tensor)
    train_loss = loss(output, y_train_tensor)

    train_loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        gru.eval()
        with torch.no_grad():
            test_output = gru(X_test_tensor)
            test_loss = loss(test_output, y_test_tensor)
        print(
            f"Epoch [{epoch + 1}/{epochs}], Train Loss: {train_loss.item()}, Test Loss: {test_loss.item()}"
        )

In [None]:
points = []
for i in range(len(X_test)):
    input_seq = (
        torch.tensor(X_test[i, :], dtype=torch.float32)
        .to(device)
        .unsqueeze(-1)
        .unsqueeze(0)
    )
    value = gru(input_seq)
    points.append(value.cpu().item())
points = y_scaler.inverse_transform(np.array(points).reshape(-1, 1))
print(mean_squared_error(points, y_scaler.inverse_transform(y_test.reshape(-1, 1))))
plt.plot(y_scaler.inverse_transform(y_test.reshape(-1, 1)), label="Observed")
plt.plot(points, "--", label="Predicted")
plt.legend()

In [None]:
# torch.save(gru, "../models/gru1.pt")