In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [2]:
# 1. Download Stock Data
def download_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    return data['Close'].values, data.index

In [3]:
# 2. Preprocess Data
def preprocess_data(data, sequence_length):
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data.reshape(-1, 1)).reshape(-1)

    sequences = []
    targets = []
    for i in range(len(data) - sequence_length):
        sequences.append(data[i:i+sequence_length])
        targets.append(data[i+sequence_length])

    return np.array(sequences), np.array(targets), scaler

In [4]:
# 3. Create Dataset Class
class StockDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return torch.tensor(self.sequences[idx], dtype=torch.float32), torch.tensor(self.targets[idx], dtype=torch.float32)

In [5]:
# 4a. Build the Transformer Model
class CustomTransformerModel(nn.Module):
    def __init__(self, input_size, d_model, nhead, num_layers, dim_feedforward, dropout):
        super(CustomTransformerModel, self).__init__()
        self.d_model = d_model
        self.embedding = nn.Linear(input_size, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)
        self.fc_out = nn.Linear(d_model, 1)

    def forward(self, src):
        src = self.embedding(src) * np.sqrt(self.d_model)
        output = self.transformer_encoder(src)
        output = self.fc_out(output[:, -1, :])
        return output

In [6]:
# 4b. Build the LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [7]:
# 5. Train the Model
def train_model(model, train_loader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs.unsqueeze(-1))  # Add extra dimension for feature
            loss = criterion(outputs.squeeze(), targets)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

In [11]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score

# 6. Main Function
if __name__ == "__main__":
    # Parameters
    START_DATE = '2022-01-01'
    END_DATE = '2024-01-01'
    SEQUENCE_LENGTH = 30
    BATCH_SIZE = 32
    EPOCHS = 20
    INPUT_SIZE = 1
    D_MODEL = 64
    NHEAD = 4
    NUM_LAYERS = 2
    DIM_FEEDFORWARD = 128
    DROPOUT = 0.1
    HIDDEN_SIZE = 64
    OUTPUT_SIZE = 1
    tickers = ['GOOG', 'BTC', 'NVDA']
    all_rmse = []
    all_acc  = []

    for ticker in tickers:
        data, dates = download_stock_data(ticker, START_DATE, END_DATE)
        sequences, targets, scaler = preprocess_data(data, SEQUENCE_LENGTH)

        # Create DataLoader
        dataset = StockDataset(sequences, targets)
        train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

        # Initialize models, criterion, and optimizer
        transformer_model = CustomTransformerModel(INPUT_SIZE, D_MODEL, NHEAD, NUM_LAYERS, DIM_FEEDFORWARD, DROPOUT)
        lstm_model = LSTMModel(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE, DROPOUT)

        criterion = nn.MSELoss()
        transformer_optimizer = torch.optim.Adam(transformer_model.parameters(), lr=0.001)
        lstm_optimizer = torch.optim.Adam(lstm_model.parameters(), lr=0.001)

        # Train the models
        print(f"\nTraining Transformer Model for {ticker}")
        train_model(transformer_model, train_loader, criterion, transformer_optimizer, EPOCHS)
        print(f"Training LSTM Model for {ticker}")
        train_model(lstm_model, train_loader, criterion, lstm_optimizer, EPOCHS)

        # Evaluate the models
        transformer_predictions = []
        lstm_predictions = []

        with torch.no_grad():
            for i in range(len(sequences)):
                input_seq = torch.tensor(sequences[i], dtype=torch.float32).unsqueeze(0).unsqueeze(-1)
                transformer_pred = transformer_model(input_seq).item()
                lstm_pred = lstm_model(input_seq).item()
                transformer_predictions.append(transformer_pred)
                lstm_predictions.append(lstm_pred)

        # Inverse transform the predictions and targets
        transformer_predictions = scaler.inverse_transform(np.array(transformer_predictions).reshape(-1, 1)).reshape(-1)
        lstm_predictions = scaler.inverse_transform(np.array(lstm_predictions).reshape(-1, 1)).reshape(-1)
        targets = scaler.inverse_transform(targets.reshape(-1, 1)).reshape(-1)

        # Calculate RMSE
        transformer_rmse = np.sqrt(mean_squared_error(targets, transformer_predictions))
        lstm_rmse = np.sqrt(mean_squared_error(targets, lstm_predictions))

        # Append RMSE to list for overall evaluation
        all_rmse.append((ticker, transformer_rmse, lstm_rmse))

    # Overall evaluation
    print("Overall Evaluation:")
    evaluation_data = []
    range_targets = max(targets) - min(targets)
    for ticker, transformer_rmse, lstm_rmse in all_rmse:
        transformer_acc = (1 - (transformer_rmse / range_targets)) * 100
        lstm_acc = (1 - (lstm_rmse / range_targets)) * 100
        evaluation_data.append({
            'Ticker': ticker,
            'Transformer Model RMSE': transformer_rmse,
            'LSTM Model RMSE': lstm_rmse,
            'Transformer Model ACC': transformer_acc,
            'LSTM Model ACC': lstm_acc
        })
    evaluation_df = pd.DataFrame(evaluation_data)

[*********************100%%**********************]  1 of 1 completed



Training Transformer Model for GOOG
Epoch 1/20, Loss: 0.0452
Epoch 2/20, Loss: 0.0184
Epoch 3/20, Loss: 0.0191
Epoch 4/20, Loss: 0.0064
Epoch 5/20, Loss: 0.0100
Epoch 6/20, Loss: 0.0052
Epoch 7/20, Loss: 0.0041
Epoch 8/20, Loss: 0.0100
Epoch 9/20, Loss: 0.0052
Epoch 10/20, Loss: 0.0036
Epoch 11/20, Loss: 0.0075
Epoch 12/20, Loss: 0.0036
Epoch 13/20, Loss: 0.0039
Epoch 14/20, Loss: 0.0044
Epoch 15/20, Loss: 0.0019
Epoch 16/20, Loss: 0.0030
Epoch 17/20, Loss: 0.0058
Epoch 18/20, Loss: 0.0036
Epoch 19/20, Loss: 0.0044
Epoch 20/20, Loss: 0.0028
Training LSTM Model for GOOG
Epoch 1/20, Loss: 0.0468
Epoch 2/20, Loss: 0.0560
Epoch 3/20, Loss: 0.0428
Epoch 4/20, Loss: 0.0154
Epoch 5/20, Loss: 0.0162
Epoch 6/20, Loss: 0.0086
Epoch 7/20, Loss: 0.0092
Epoch 8/20, Loss: 0.0062
Epoch 9/20, Loss: 0.0069
Epoch 10/20, Loss: 0.0046
Epoch 11/20, Loss: 0.0066
Epoch 12/20, Loss: 0.0059
Epoch 13/20, Loss: 0.0045
Epoch 14/20, Loss: 0.0075
Epoch 15/20, Loss: 0.0037
Epoch 16/20, Loss: 0.0043
Epoch 17/20, Los

[*********************100%%**********************]  1 of 1 completed



Training Transformer Model for BTC
Epoch 1/20, Loss: 0.0180
Epoch 2/20, Loss: 0.0103
Epoch 3/20, Loss: 0.0092
Epoch 4/20, Loss: 0.0073
Epoch 5/20, Loss: 0.0059
Epoch 6/20, Loss: 0.0064
Epoch 7/20, Loss: 0.0043
Epoch 8/20, Loss: 0.0046
Epoch 9/20, Loss: 0.0038
Epoch 10/20, Loss: 0.0017
Epoch 11/20, Loss: 0.0054
Epoch 12/20, Loss: 0.0027
Epoch 13/20, Loss: 0.0024
Epoch 14/20, Loss: 0.0020
Epoch 15/20, Loss: 0.0030
Epoch 16/20, Loss: 0.0019
Epoch 17/20, Loss: 0.0023
Epoch 18/20, Loss: 0.0011
Epoch 19/20, Loss: 0.0028
Epoch 20/20, Loss: 0.0024
Training LSTM Model for BTC
Epoch 1/20, Loss: 0.0201
Epoch 2/20, Loss: 0.0231
Epoch 3/20, Loss: 0.0184
Epoch 4/20, Loss: 0.0190
Epoch 5/20, Loss: 0.0035
Epoch 6/20, Loss: 0.0039
Epoch 7/20, Loss: 0.0050
Epoch 8/20, Loss: 0.0056
Epoch 9/20, Loss: 0.0028
Epoch 10/20, Loss: 0.0057
Epoch 11/20, Loss: 0.0040
Epoch 12/20, Loss: 0.0039
Epoch 13/20, Loss: 0.0030
Epoch 14/20, Loss: 0.0026
Epoch 15/20, Loss: 0.0032
Epoch 16/20, Loss: 0.0022
Epoch 17/20, Loss:

[*********************100%%**********************]  1 of 1 completed



Training Transformer Model for NVDA
Epoch 1/20, Loss: 0.0554
Epoch 2/20, Loss: 0.0124
Epoch 3/20, Loss: 0.0036
Epoch 4/20, Loss: 0.0072
Epoch 5/20, Loss: 0.0043
Epoch 6/20, Loss: 0.0026
Epoch 7/20, Loss: 0.0049
Epoch 8/20, Loss: 0.0049
Epoch 9/20, Loss: 0.0042
Epoch 10/20, Loss: 0.0031
Epoch 11/20, Loss: 0.0059
Epoch 12/20, Loss: 0.0041
Epoch 13/20, Loss: 0.0025
Epoch 14/20, Loss: 0.0019
Epoch 15/20, Loss: 0.0017
Epoch 16/20, Loss: 0.0031
Epoch 17/20, Loss: 0.0020
Epoch 18/20, Loss: 0.0012
Epoch 19/20, Loss: 0.0016
Epoch 20/20, Loss: 0.0014
Training LSTM Model for NVDA
Epoch 1/20, Loss: 0.1019
Epoch 2/20, Loss: 0.0460
Epoch 3/20, Loss: 0.0067
Epoch 4/20, Loss: 0.0058
Epoch 5/20, Loss: 0.0063
Epoch 6/20, Loss: 0.0064
Epoch 7/20, Loss: 0.0033
Epoch 8/20, Loss: 0.0041
Epoch 9/20, Loss: 0.0017
Epoch 10/20, Loss: 0.0050
Epoch 11/20, Loss: 0.0047
Epoch 12/20, Loss: 0.0023
Epoch 13/20, Loss: 0.0019
Epoch 14/20, Loss: 0.0027
Epoch 15/20, Loss: 0.0028
Epoch 16/20, Loss: 0.0023
Epoch 17/20, Los

In [12]:
evaluation_df.head()

Unnamed: 0,Ticker,Transformer Model RMSE,LSTM Model RMSE,Transformer Model ACC,LSTM Model ACC
0,GOOG,3.793199,4.279115,99.031903,98.907887
1,BTC,0.503902,0.49089,99.871395,99.874715
2,NVDA,17.868783,18.390999,95.439543,95.306263
