In [4]:
! pip install yfinance



In [61]:
import yfinance as yf
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split


In [64]:
data = yf.download("^STOXX50E", period="max")

[*********************100%%**********************]  1 of 1 completed


In [65]:
data = data.reset_index(drop=False)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2007-03-30,4177.669922,4207.759766,4160.350098,4181.029785,4181.029785,0
1,2007-04-02,4177.310059,4191.759766,4163.77002,4189.549805,4189.549805,0
2,2007-04-03,4199.540039,4248.069824,4199.540039,4246.299805,4246.299805,0
3,2007-04-04,4252.709961,4261.830078,4242.049805,4261.830078,4261.830078,0
4,2007-04-05,4259.97998,4274.169922,4253.790039,4271.540039,4271.540039,0


In [66]:
data = data.drop('Adj Close', axis=1)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2007-03-30,4177.669922,4207.759766,4160.350098,4181.029785,0
1,2007-04-02,4177.310059,4191.759766,4163.77002,4189.549805,0
2,2007-04-03,4199.540039,4248.069824,4199.540039,4246.299805,0
3,2007-04-04,4252.709961,4261.830078,4242.049805,4261.830078,0
4,2007-04-05,4259.97998,4274.169922,4253.790039,4271.540039,0


In [17]:
data.dtypes

Date         datetime64[ns]
Open                float64
High                float64
Low                 float64
Close               float64
Adj Close           float64
Volume                int64
dtype: object

In [84]:
X = data.drop('Close', axis=1)
y = data[['Date', 'Close']]
X, y

(           Date         Open         High          Low    Volume
 0    2007-03-30  4177.669922  4207.759766  4160.350098         0
 1    2007-04-02  4177.310059  4191.759766  4163.770020         0
 2    2007-04-03  4199.540039  4248.069824  4199.540039         0
 3    2007-04-04  4252.709961  4261.830078  4242.049805         0
 4    2007-04-05  4259.979980  4274.169922  4253.790039         0
 ...         ...          ...          ...          ...       ...
 4291 2024-05-13  5083.540039  5087.919922  5068.060059  23625800
 4292 2024-05-14  5079.370117  5083.549805  5057.939941  35579700
 4293 2024-05-15  5083.250000  5102.299805  5075.060059  28208300
 4294 2024-05-16  5094.770020  5101.410156  5068.109863  26348900
 4295 2024-05-17  5061.439941  5068.390137  5040.229980  33517100
 
 [4296 rows x 5 columns],
            Date        Close
 0    2007-03-30  4181.029785
 1    2007-04-02  4189.549805
 2    2007-04-03  4246.299805
 3    2007-04-04  4261.830078
 4    2007-04-05  4271.540039


In [85]:
window_size = 5

# Initialize an empty list to store 3D arrays
X_3d_list = []

# Iterate through each window
for i in range(len(X) - window_size + 1):
    # Extract the data for the current window
    window_data = X.iloc[i:i+window_size]
    # Convert the window data to a 2D numpy array
    window_array = window_data.drop(columns=['Date']).to_numpy()
    # Append the 2D array with the date column included as the first feature
    window_array_with_time = np.insert(window_array, 0, window_data['Date'], axis=1)
    # Append the 2D array to the list
    X_3d_list.append(window_array_with_time)

# Stack the list of 3D arrays into a single 3D numpy array
X_3d = np.stack(X_3d_list)


X_3d[1], X_3d[-1]

(array([[1.17547200e+18, 4.17731006e+03, 4.19175977e+03, 4.16377002e+03,
         0.00000000e+00],
        [1.17555840e+18, 4.19954004e+03, 4.24806982e+03, 4.19954004e+03,
         0.00000000e+00],
        [1.17564480e+18, 4.25270996e+03, 4.26183008e+03, 4.24204980e+03,
         0.00000000e+00],
        [1.17573120e+18, 4.25997998e+03, 4.27416992e+03, 4.25379004e+03,
         0.00000000e+00],
        [1.17616320e+18, 4.27568018e+03, 4.30766016e+03, 4.26700977e+03,
         0.00000000e+00]]),
 array([[1.71555840e+18, 5.08354004e+03, 5.08791992e+03, 5.06806006e+03,
         2.36258000e+07],
        [1.71564480e+18, 5.07937012e+03, 5.08354980e+03, 5.05793994e+03,
         3.55797000e+07],
        [1.71573120e+18, 5.08325000e+03, 5.10229980e+03, 5.07506006e+03,
         2.82083000e+07],
        [1.71581760e+18, 5.09477002e+03, 5.10141016e+03, 5.06810986e+03,
         2.63489000e+07],
        [1.71590400e+18, 5.06143994e+03, 5.06839014e+03, 5.04022998e+03,
         3.35171000e+07]]))

In [90]:
window_size = 5

# Initialize an empty list to store 3D arrays
y_3d_list = []

# Iterate through each window
for i in range(len(y) - window_size + 1):
    # Extract the data for the current window
    window_data = y.iloc[i:i+window_size]
    # Convert the window data to a 2D numpy array
    window_array = window_data.drop(columns=['Date']).to_numpy()
    # Append the 2D array with the date column included as the first feature
    #window_array_with_time = np.insert(window_array, 0, window_data['Date'], axis=1)
    # Append the 2D array to the list
    y_3d_list.append(window_array)

# Stack the list of 3D arrays into a single 3D numpy array
y_3d = np.stack(y_3d_list)

y_3d[1], y_3d[-1]

(array([[4189.54980469],
        [4246.29980469],
        [4261.83007812],
        [4271.54003906],
        [4301.47021484]]),
 array([[5078.95996094],
        [5080.29003906],
        [5100.89990234],
        [5072.45019531],
        [5064.14013672]]))

Unnamed: 0,Date,Close
0,2007-03-30,4181.029785
1,2007-04-02,4189.549805
2,2007-04-03,4246.299805
3,2007-04-04,4261.830078
4,2007-04-05,4271.540039
...,...,...
4291,2024-05-13,5078.959961
4292,2024-05-14,5080.290039
4293,2024-05-15,5100.899902
4294,2024-05-16,5072.450195


In [165]:
X_train, X_remainder, y_train, y_remainder = train_test_split(X_3d, y[4:].drop('Date', axis=1).reset_index(drop=True).values.reshape(-1, 1, 1), test_size=0.3, shuffle=False)
X_val, X_test, y_val, y_test = train_test_split(X_remainder, y_remainder, test_size=0.333, shuffle=False)
X_test, X_forecast, y_test, y_forecast = train_test_split(X_test, y_test, test_size=0.2, shuffle=False)

In [164]:
y[4:].drop('Date', axis=1).reset_index(drop=True).values.reshape(-1, 1, 1)

array([[[4271.54003906]],

       [[4301.47021484]],

       [[4293.20019531]],

       ...,

       [[5100.89990234]],

       [[5072.45019531]],

       [[5064.14013672]]])

In [129]:
len(X_train), len(X_val), len(X_test), len(X_forecast), len(y_val)

(3004, 859, 343, 86, 859)

In [130]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [97]:
batch_size = 32

In [172]:
train_dataset = CustomDataset(X_train, y_train)
validation_dataset = CustomDataset(X_val, y_val)
test_dataset = CustomDataset(X_test, y_test)
forecast_dataset = CustomDataset(X_forecast, y_forecast)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
forecast_dataloader = DataLoader(forecast_dataset, batch_size=batch_size, shuffle=False)

In [167]:
for batch in train_dataloader:
    # batch is a tensor of shape (batch_size, window_size, num_features)
    # Your training/validation loop goes here
    pass

In [168]:
class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout_prob):
        super(GRUModel, self).__init__()

        self.layer_dim = num_layers
        self.hidden_dim = hidden_dim

        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # Forward propagation by passing in the input and hidden state into the model
        out, _ = self.gru(x, h0.detach())

        # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
        # so that it can fit into the fully connected layer
        out = out[:, -1, :]

        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)
        return out

In [133]:
input_dim = len(X_train[0])
hidden_dim = 64
num_layers = 1
output_dim = 1
dropout_prob = 0.3

In [173]:
model = GRUModel(input_dim, hidden_dim, num_layers, output_dim, dropout_prob)

In [174]:
num_epochs = 10
lr = 0.001
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [176]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_dataloader:
        optimizer.zero_grad()
        inputs = inputs.float()
        targets = targets.float()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        validation_loss = 0
        for inputs, targets in validation_dataloader:
            inputs = inputs.float()
            targets = targets.float()
            outputs = model(inputs)
            validation_loss += criterion(outputs, targets).item()

        validation_loss /= len(validation_dataloader)

    print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {validation_loss}')


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/10, Validation Loss: 13425643.74074074
Epoch 2/10, Validation Loss: 13402750.370370371
Epoch 3/10, Validation Loss: 13379879.537037037
Epoch 4/10, Validation Loss: 13357037.685185185
Epoch 5/10, Validation Loss: 13334222.407407407
Epoch 6/10, Validation Loss: 13311436.944444444
Epoch 7/10, Validation Loss: 13288680.129629629
Epoch 8/10, Validation Loss: 13265951.092592593
Epoch 9/10, Validation Loss: 13243250.333333334
Epoch 10/10, Validation Loss: 13220578.092592593


In [177]:
model.eval()
with torch.no_grad():
    test_loss = 0
    for inputs, targets in test_dataloader:
        inputs = inputs.float()
        targets = targets.float()
        outputs = model(inputs)
        test_loss += criterion(outputs, targets).item()

    test_loss /= len(test_dataloader)

print(f'Test Loss: {test_loss}')

Test Loss: 16936410.545454547


  return F.mse_loss(input, target, reduction=self.reduction)


In [178]:
model.eval()
with torch.no_grad():
    forecast_loss = 0
    for inputs, targets in forecast_dataloader:
        inputs = inputs.float()
        targets = targets.float()
        outputs = model(inputs)
        forecast_loss += criterion(outputs, targets).item()

    forecast_loss /= len(forecast_dataloader)

print(f'Test Loss: {forecast_loss}')

Test Loss: 23508572.666666668


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
