In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [2]:
# Set device to cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Load and normalize the data
df = pd.read_csv('../data/NVDA_processed.csv', usecols=['Close/Last', 'Open', 'High', 'Low'])

scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_data = scaler.fit_transform(df)

In [4]:
# Function to create sequences
def create_sequences(data, look_back_days):
    xs, ys = [], []
    for i in range(len(data) - look_back_days - 1, -1, -1):
        x = data[i : (i + look_back_days)]
        y = data[i + look_back_days][0]  # Predict next close value
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [5]:
# Separate data
look_back_days = 10  # Number of time steps to look back
X, y = create_sequences(scaled_data, look_back_days)
X_train, y_train = torch.FloatTensor(X), torch.FloatTensor(y)

In [6]:
# DataLoader
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, shuffle=True, batch_size=1, drop_last=True)

In [7]:
class StockLSTM(nn.Module):
    def __init__(self, input_size=40, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size, device=device),
                            torch.zeros(1,1,self.hidden_layer_size, device=device))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

In [8]:
model = StockLSTM().to(device)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [9]:
epochs = 150

for i in range(epochs):
    for seq, labels in train_loader:
        seq, labels = seq.to(device), labels.to(device)
        
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size, device=device),
                             torch.zeros(1, 1, model.hidden_layer_size, device=device))

        y_pred = model(seq)
        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    if i % 25 == 0:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

epoch:   0 loss: 0.00026953
epoch:  25 loss: 0.00007520
epoch:  50 loss: 0.00001673
epoch:  75 loss: 0.00040388
epoch: 100 loss: 0.00014171
epoch: 125 loss: 0.00000086


In [47]:
# create the sequence for prediction, the assumption here is that the 0th index has the most recent data
def create_sequences_for_eval(data, look_back_days):
    xs = []
    for i in range(look_back_days - 1, -1, -1):
        x = data[i : (i + 10)][::-1]
        xs.append(x)
    return np.array(xs)


In [83]:
model.eval()

# this variable modified which date we are predicting, 25 means we are predicting the price for the 24th date, 0 is looking for future
for days_ago in range(10,30):
    #days_ago = 1
    
    recent_seq_scaled = create_sequences_for_eval(scaled_data[days_ago:], look_back_days)
    recent_seq_tensor  = torch.FloatTensor(recent_seq_scaled)
    
    if torch.cuda.is_available():
        recent_seq_tensor  = recent_seq_tensor.to('cuda')
    
    
    with torch.no_grad():
        predict = model(recent_seq_tensor)
    
    dummy_input = np.zeros((1, 4))  # 4 features for 'Close/Last', 'Open', 'High', 'Low'
    dummy_input[0, 0] = predict  # Replace the first value with your prediction
    
    # Perform inverse transformation
    predicted_stock_price_scaled = scaler.inverse_transform(dummy_input)[0, 0]
    
    print(f"Predicted stock price: {predicted_stock_price_scaled}, Actual price: {df.iloc[days_ago, 0]}")

    # print(recent_seq_scaled[9][::-1])
    # print(scaled_data[days_ago:days_ago + 10])

Predicted stock price: 762.7772293452916, Actual price: 696.41
Predicted stock price: 752.7639474947749, Actual price: 700.99
Predicted stock price: 744.6661768485604, Actual price: 682.23
Predicted stock price: 730.272874239534, Actual price: 693.32
Predicted stock price: 703.5584408906101, Actual price: 661.6
Predicted stock price: 687.771296969503, Actual price: 630.27
Predicted stock price: 682.994372536689, Actual price: 615.27
Predicted stock price: 690.6761208344994, Actual price: 627.74
Predicted stock price: 681.0805257816611, Actual price: 624.65
Predicted stock price: 675.9541794639824, Actual price: 610.31
Predicted stock price: 675.1963827003538, Actual price: 616.17
Predicted stock price: 660.9373156760632, Actual price: 613.62
Predicted stock price: 645.6944241751729, Actual price: 598.73
Predicted stock price: 643.3601690229773, Actual price: 596.54
Predicted stock price: 627.418703663051, Actual price: 594.91
Predicted stock price: 616.2962463115155, Actual price: 571.