In [6]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [7]:
# Set device to cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
# Load and normalize the data
df = pd.read_csv('../data/Reversed_S&P500_5_years.csv', usecols=['Close/Last', 'Open', 'High', 'Low'])

scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_data = scaler.fit_transform(df)

In [9]:
def create_test(data, days):
    xs = []
    if len(data) < days:
        raise "Warning: Not enough data for the ${days}"
    for i in range(len(data) - days):
        xs.append(data[i : i + days])
        
    xs = np.array(xs, dtype=np.float32)
    return torch.tensor(xs)

In [10]:
def create_result(data, days):
    ys = []
    if len(data) < days:
        raise "Warning: Not enough data for the ${days}"
    for i in range(days, len(data)):
        y = data[i][0]
        ys.append(y)
    
    ys = np.array(ys, dtype=np.float32)
    return torch.tensor(ys)

In [11]:
days = 10

In [12]:
xs = create_test(scaled_data, days)

print(xs.ndim)
print(xs.size())
print(xs[0:2])

3
torch.Size([1249, 10, 4])
tensor([[[-0.6115, -0.6507, -0.6458, -0.5926],
         [-0.6080, -0.6436, -0.6440, -0.5878],
         [-0.6150, -0.6427, -0.6500, -0.5946],
         [-0.6023, -0.6424, -0.6408, -0.5842],
         [-0.5998, -0.6251, -0.6268, -0.5730],
         [-0.6014, -0.6339, -0.6343, -0.5769],
         [-0.6025, -0.6374, -0.6397, -0.5871],
         [-0.6081, -0.6370, -0.6411, -0.5818],
         [-0.5944, -0.6296, -0.6307, -0.5784],
         [-0.6022, -0.6178, -0.6243, -0.5923]],

        [[-0.6080, -0.6436, -0.6440, -0.5878],
         [-0.6150, -0.6427, -0.6500, -0.5946],
         [-0.6023, -0.6424, -0.6408, -0.5842],
         [-0.5998, -0.6251, -0.6268, -0.5730],
         [-0.6014, -0.6339, -0.6343, -0.5769],
         [-0.6025, -0.6374, -0.6397, -0.5871],
         [-0.6081, -0.6370, -0.6411, -0.5818],
         [-0.5944, -0.6296, -0.6307, -0.5784],
         [-0.6022, -0.6178, -0.6243, -0.5923],
         [-0.6045, -0.6324, -0.6392, -0.5815]]])


In [13]:
ys = create_result(scaled_data, days)

# print(ys.ndim)
print(ys.size())
print(ys[0:10])

torch.Size([1249])
tensor([-0.6045, -0.6175, -0.6336, -0.6378, -0.6090, -0.6031, -0.5892, -0.5910,
        -0.5809, -0.5734])


In [14]:
#Spliting the data

split_ratio = 0.8
split_index = int(xs.size()[0] * split_ratio)

train_x = xs[:split_index]
train_y = ys[:split_index]

print(len(train_x), len(train_y))

999 999


In [15]:
# DataLoader
train_data = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_data, shuffle=False, batch_size=1, drop_last=True)

In [16]:
class StockLSTM(nn.Module):
    def __init__(self, input_size=4*days, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size, device=device),
                            torch.zeros(1,1,self.hidden_layer_size, device=device))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

In [17]:
model = StockLSTM().to(device)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [18]:
epochs = 50

for i in range(epochs):
    for seq, labels in train_loader:
        seq, labels = seq.to(device), labels.to(device)
        
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size, device=device),
                             torch.zeros(1, 1, model.hidden_layer_size, device=device))

        y_pred = model(seq)
        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    if i % 25 == 0:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

epoch:   0 loss: 0.00031425
epoch:  25 loss: 0.00014801


In [47]:
# create the sequence for prediction, the assumption here is that the 0th index has the most recent data
def create_sequences_for_eval(data, days, days_ago):
    xs = []
    if len(data) < days :
        raise "Warning: Not enough data for the ${days}"
    if days_ago == 0 :
        xs.append(data[-days:])
    else :
        xs.append(data[-days_ago - days : -days_ago])
        
    print(xs[-1][-1])
        
    xs = np.array(xs, dtype=np.float32)
    return torch.tensor(xs)


In [50]:
# this block for prediction testing
model.eval()

# this variable modified which date we are predicting, 25 means we are predicting the price for the 24th date, 0 is looking for future
for days_ago in range(10, 20):
    #days_ago = 1
    
    recent_seq_scaled = create_sequences_for_eval(scaled_data, days, days_ago)
    
    if torch.cuda.is_available():
        recent_seq_scaled  = recent_seq_scaled.to('cuda')
    
    
    with torch.no_grad():
        predict = model(recent_seq_scaled)
    
    dummy_input = np.zeros((1, 4))  # 4 features for 'Close/Last', 'Open', 'High', 'Low'
    dummy_input[0, 0] = predict  # Replace the first value with your prediction
    
    # Perform inverse transformation
    predicted_stock_price_scaled = scaler.inverse_transform(dummy_input)[0, 0]
    if days_ago == 0 :
        print(f"Predicted price: {predicted_stock_price_scaled:6.2f}")
    else :
        print(f"Predicted price: {predicted_stock_price_scaled:6.2f} ---- Actual price: {df.iloc[-days_ago, 0]:6} ---- ", end="")
        Diff = (predicted_stock_price_scaled - df.iloc[-days_ago, 0]) / predicted_stock_price_scaled
        Diff_percentage = "{:6.2f}%".format(Diff * 100)
        print(f"Diff: {Diff_percentage}")

    # print(recent_seq_scaled[9][::-1])
    # print(scaled_data[days_ago:days_ago + 10])

[0.94906046 0.91602017 0.94679109 0.92294431]
[0.94906046 0.91602017 0.94679109 0.92294431]
Predicted price: 4495.14 ---- Actual price: 4942.81 ---- Diff:  -9.96%
[0.91151476 0.87591683 0.8970615  0.88438107]
[0.91151476 0.87591683 0.8970615  0.88438107]
Predicted price: 4472.21 ---- Actual price: 4958.61 ---- Diff: -10.88%
[0.86815312 0.90370819 0.89690136 0.87845535]
[0.86815312 0.90370819 0.89690136 0.87845535]
Predicted price: 4477.96 ---- Actual price: 4906.19 ---- Diff:  -9.56%
[0.92496589 0.92319425 0.91461826 0.92880632]
[0.92496589 0.92319425 0.91461826 0.92880632]
Predicted price: 4524.45 ---- Actual price: 4845.65 ---- Diff:  -7.10%
[0.92708598 0.89915414 0.91332261 0.90836717]
[0.92708598 0.89915414 0.91332261 0.90836717]
Predicted price: 4544.59 ---- Actual price: 4924.97 ---- Diff:  -8.37%
[0.90061347 0.89620569 0.89685769 0.90416889]
[0.90061347 0.89620569 0.89685769 0.90416889]
Predicted price: 4541.71 ---- Actual price: 4927.93 ---- Diff:  -8.50%
[0.9028983  0.89456361