In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from utils import create_sequences_sent

# import all data
df = pd.read_csv('data/upload_DJIA_table.csv', parse_dates=['Date'], index_col='Date')
df = df[['Close']]
df = df.sort_index()

train_data = df[:'2014']
test_data = df['2015':]

bert_embeddings = np.load('bert_embeddings.npy')
fingpt_sentiments = np.load('fingpt_sentiment.npy').astype('float32') 

bert_embeddings_train = bert_embeddings[:train_data.shape[0]].reshape(-1, 768)
bert_embeddings_test = bert_embeddings[train_data.shape[0]:].reshape(-1, 768)

X_train_emb, X_train_pr, X_train_sent, y_train = create_sequences_sent(train_data, 60, bert_embeddings_train, fingpt_sentiments)
X_test_emb, X_test_pr, X_test_sent, y_test = create_sequences_sent(test_data, 60, bert_embeddings_test, fingpt_sentiments)

In [2]:
X_train_sent.shape, X_test_sent.shape

((1551, 60), (318, 60))

In [3]:
print("Train shapes: ", X_train_emb.shape, X_train_pr.shape, X_train_sent.shape, y_train.shape)
print("Test shapes: ", X_test_emb.shape, X_test_pr.shape, X_test_sent.shape, y_test.shape)

# convert to torch dataset
dataset_train = TensorDataset(
    torch.from_numpy(X_train_emb).float(),
    torch.from_numpy(X_train_pr).float(),
    torch.from_numpy(X_train_sent).float(),
    torch.from_numpy(y_train).float()
)
dataset_test = TensorDataset(
    torch.from_numpy(X_test_emb).float(),
    torch.from_numpy(X_test_pr).float(),
    torch.from_numpy(X_test_sent).float(),
    torch.from_numpy(y_test).float()
)

dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=True)

Train shapes:  (1551, 60, 768) (1551, 60) (1551, 60) (1551,)
Test shapes:  (318, 60, 768) (318, 60) (318, 60) (318,)


In [10]:
from models import StockPredictorSent

# Model parameters
embedding_dim = 768  # Size of BERT embeddings
price_dim = 1        # Each stock price is a single number
sent_dim = 1        # Each sentiment is a single number
hidden_dim = 128
num_layers = 2

model = StockPredictorSent(embedding_dim, price_dim, sent_dim, hidden_dim, num_layers)

In [11]:
from models import BertSentimentTrainer

# Assuming you have a model instance 'model' and DataLoaders 'dataloader_train' and 'dataloader_test'
trainer = BertSentimentTrainer(model, dataloader_train, dataloader_test, num_epochs=10, learning_rate=0.001)
trainer.train()

Epoch 1, Loss: 126368232.0, MAE: 11241.362550865442
Epoch 2, Loss: 141721200.0, MAE: 11904.671352036561
Epoch 3, Loss: 139982544.0, MAE: 11831.421892570648
Epoch 4, Loss: 82866856.0, MAE: 9103.123420013595
Epoch 5, Loss: 35907120.0, MAE: 5992.255001249529
Epoch 6, Loss: 16482364.0, MAE: 4059.847780397684
Epoch 7, Loss: 7530426.0, MAE: 2744.1621672197143
Epoch 8, Loss: 7355697.0, MAE: 2712.138823880518
Epoch 9, Loss: 8914146.0, MAE: 2985.6567116800284
Epoch 10, Loss: 8588848.0, MAE: 2930.673642697187


In [12]:
trainer.test()

Test MSE:  tensor(23712514.)
Test MAE:  tensor(4869.5498)
