In [14]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import torch
from sklearn.preprocessing import MinMaxScaler
import tensorflow
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm
from sklearn.metrics import mean_squared_error

In [3]:
ticker = 'CVX'
start_date = '2000-01-01'
end_date = '2024-01-01'

In [4]:
df = yf.download(ticker, start=start_date, end=end_date)
df.columns = df.columns.droplevel('Ticker')
print(df.head())

[*********************100%***********************]  1 of 1 completed

Price       Adj Close     Close      High       Low      Open   Volume
Date                                                                  
2000-01-03  16.611206  41.81250  42.93750  41.28125  42.93750  4387600
2000-01-04  16.611206  41.81250  42.06250  41.25000  41.46875  3702400
2000-01-05  16.909164  42.56250  43.28125  41.53125  41.53125  5567600
2000-01-06  17.629240  44.37500  44.59375  42.65625  42.65625  4353400
2000-01-07  17.939602  45.15625  45.43750  44.50000  45.00000  4487400





In [5]:
window_size = 60
guess_size = 7
train_end_year = 2018
validation_end_year = 2022

In [6]:
def process_df(df: pd.DataFrame, window_size, guess_size):
    df = df.drop(columns=['Volume'])

    # fill in NaNs
    df = df.copy()
    df = df.bfill()

    # difference data
    first = df.iloc[0]
    differenced = df.diff().dropna()

    # split into windows
    close = differenced['Close'].to_numpy()
    differenced = differenced.drop(columns=['Close']).to_numpy()

    X = []
    y = []
    for index in range(len(differenced) - window_size - guess_size):
        X.append(differenced[index: index + window_size])
        y.append(close[index + window_size: index + window_size + guess_size])

    X = torch.tensor(np.array(X), dtype=torch.float32)
    y = torch.tensor(np.array(y), dtype=torch.float32)
    return X, y, first



train = df[df.index.year <= train_end_year]
validation = df[(df.index.year > train_end_year) & (df.index.year <= validation_end_year)]
test = df[df.index.year > validation_end_year]

X_train, y_train, train_first = process_df(train, window_size, guess_size)
X_validation, y_validation, validation_first = process_df(validation, window_size, guess_size)
X_test, y_test, test_first = process_df(test, window_size, guess_size)

# X_combined = pd.concat([X_train, X_validation])
# y_combined = pd.concat([y_train, y_validation])
print(X_train.shape)
print(y_train.shape)

torch.Size([4711, 60, 4])
torch.Size([4711, 7])


In [8]:
class LSTM(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = torch.nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        last_time_step = out[:, -1, :]
        predictions = self.fc(last_time_step)
        return predictions

In [27]:
hidden_dim = 128
num_layers = 3
dropout = 0.3
lr = 0.001
epochs = 100
loss = torch.nn.MSELoss()
batch_size = 32

model = LSTM(
    input_dim=4,
    hidden_dim=hidden_dim,
    num_layers=num_layers,
    output_dim=guess_size,
    dropout=dropout
)

In [28]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [31]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

optimiser = torch.optim.Adam(model.parameters(), lr=lr)
training_loss = []
model = model.to(device)

for t in tqdm(range(epochs)):
    
    for X_batch, y_batch in train_loader:
        # zero out gradients
        optimiser.zero_grad()

        # train model
        y_train_pred = model(X_batch)
        loss_val = loss(y_train_pred, y_batch)
        loss_val.backward()
        optimiser.step()

    training_loss.append(loss_val.item())
    if t % 10 == 0 and t !=0:
        print("Epoch ", t, "MSE: ", loss.item())

  0%|          | 0/100 [00:02<?, ?it/s]


KeyboardInterrupt: 