In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from utils import create_sequences

# import all data
df = pd.read_csv('data/upload_DJIA_table.csv', parse_dates=['Date'], index_col='Date')
df = df[['Close']]
df = df.sort_index()

train_data = df[:'2014']
test_data = df['2015':]

bert_embeddings = np.load('bert_embeddings.npy')

bert_embeddings_train = bert_embeddings[:train_data.shape[0]].reshape(-1, 768)
bert_embeddings_test = bert_embeddings[train_data.shape[0]:].reshape(-1, 768)
    

X_train_emb, X_train_pr, y_train = create_sequences(train_data, 60, bert_embeddings_train)
X_test_emb, X_test_pr, y_test = create_sequences(test_data, 60, bert_embeddings_test)


print("Train shapes: ", X_train_emb.shape, X_train_pr.shape, y_train.shape)
print("Test shapes: ", X_test_emb.shape, X_test_pr.shape, y_test.shape)

# convert to torch dataset
dataset_train = TensorDataset(
    torch.from_numpy(X_train_emb).float(),
    torch.from_numpy(X_train_pr).float(),
    torch.from_numpy(y_train).float()
)
dataset_test = TensorDataset(
    torch.from_numpy(X_test_emb).float(),
    torch.from_numpy(X_test_pr).float(),
    torch.from_numpy(y_test).float()
)

dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=True)

Train shapes:  (1551, 60, 768) (1551, 60) (1551,)
Test shapes:  (318, 60, 768) (318, 60) (318,)


In [2]:
from models import StockPredictor

# Model parameters
embedding_dim = 768  # Size of BERT embeddings
price_dim = 1        # Each stock price is a single number
hidden_dim = 128
num_layers = 2

model = StockPredictor(embedding_dim, price_dim, hidden_dim, num_layers)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from models import BertRnnTrainer

# Assuming you have a model instance 'model' and DataLoaders 'dataloader_train' and 'dataloader_test'
trainer = BertRnnTrainer(model, dataloader_train, dataloader_test, num_epochs=10, learning_rate=0.001)
trainer.train()

Epoch 1, Loss: 160156768.0, MAE: 12655.30592281356
Epoch 2, Loss: 164208880.0, MAE: 12814.401273567173
Epoch 3, Loss: 126178272.0, MAE: 11232.910219529042
Epoch 4, Loss: 46271344.0, MAE: 6802.304315450758
Epoch 5, Loss: 17988900.0, MAE: 4241.332337839137
Epoch 6, Loss: 6537196.5, MAE: 2556.794184129806
Epoch 7, Loss: 7154466.0, MAE: 2674.783355713131
Epoch 8, Loss: 11508705.0, MAE: 3392.448230997785
Epoch 9, Loss: 6419246.0, MAE: 2533.6230974633936
Epoch 10, Loss: 12088799.0, MAE: 3476.8950228616336


In [4]:
trainer.test()

Test MSE:  tensor(23333504.)
Test MAE:  tensor(4830.4766)
