## Importing Dependencies

In [None]:
import numpy as np
import pandas as pd
import gc
from IPython.display import clear_output

## Loading Dataset

In [None]:
scaled_df = pd.read_csv('/content/drive/MyDrive/10_Year_Historical_Scaled.csv')

## Dataset Creation and Train-Test Split

In [None]:
def create_dataset(data, lookback, prediction_horizon):
    n_samples = len(data) - lookback - prediction_horizon
    if n_samples <= 0:
        return np.empty((0, lookback, data.shape[1] - 1)), np.empty((0, prediction_horizon))

    X = np.empty((n_samples, lookback, data.shape[1] - 1), dtype=np.float32)
    Y = np.empty((n_samples, prediction_horizon), dtype=np.float32)

    for i in range(n_samples):
        X[i] = data[i:(i + lookback), 1:]
        Y[i] = data[(i + lookback):(i + lookback + prediction_horizon), 0]

    return X, Y

In [None]:
def process_stock_data(df, lookback, prediction_horizon):
    tickers = df['Ticker'].unique()
    train_x_list, train_y_list, test_x_list, test_y_list = [], [], [], []

    total_train_samples, total_test_samples = 0, 0

    for ticker in tickers:
        stock_data = df[df['Ticker'] == ticker].copy()
        stock_data.drop(columns=['Ticker', 'Date'], inplace=True)

        train_split_len = int(len(stock_data) * 0.8)
        train_data = stock_data.values[:train_split_len]
        test_data = stock_data.values[train_split_len:]

        usable_train_samples = len(train_data) - lookback - prediction_horizon
        usable_test_samples = len(test_data) - lookback - prediction_horizon

        print(f"Ticker: {ticker}, Train rows: {len(train_data)}, Test rows: {len(test_data)}")
        print(f"Ticker: {ticker}, Usable train samples: {usable_train_samples}, Usable test samples: {usable_test_samples}")

        train_x, train_y = create_dataset(train_data, lookback, prediction_horizon)
        test_x, test_y = create_dataset(test_data, lookback, prediction_horizon)

        total_train_samples += train_x.shape[0]
        total_test_samples += test_x.shape[0]

        train_x_list.append(train_x)
        train_y_list.append(train_y)
        test_x_list.append(test_x)
        test_y_list.append(test_y)

        del train_data, test_data
        gc.collect()

    train_x = np.concatenate(train_x_list, axis=0)
    train_y = np.concatenate(train_y_list, axis=0)
    test_x = np.concatenate(test_x_list, axis=0)
    test_y = np.concatenate(test_y_list, axis=0)

    clear_output(wait=True)
    print(f"Total train samples: {total_train_samples}, Total test samples: {total_test_samples}")

    return train_x, train_y, test_x, test_y


In [None]:
lookback = 60
prediction_horizon = 7

train_x, train_y, test_x, test_y = process_stock_data(scaled_df, lookback, prediction_horizon)

gc.collect()

print(f"Training data shape: X={train_x.shape}, Y={train_y.shape}")
print(f"Testing data shape: X={test_x.shape}, Y={test_y.shape}")

Total train samples: 2490880, Total test samples: 559360
Training data shape: X=(2490880, 60, 5), Y=(2490880, 7)
Testing data shape: X=(559360, 60, 5), Y=(559360, 7)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import time

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=32):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, prediction_horizon)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Take the last time step
        return out

# Convert data to PyTorch tensors
train_x_tensor = torch.FloatTensor(train_x)
train_y_tensor = torch.FloatTensor(train_y)
test_x_tensor = torch.FloatTensor(test_x)
test_y_tensor = torch.FloatTensor(test_y)

# Initialize model, loss function, and optimizer
input_size = train_x.shape[2]  # Number of features
model = LSTMModel(input_size=input_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 200
batch_size = 32

# Start timing
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    num_batches = len(train_x_tensor) // batch_size

    # Progress bar
    with tqdm(total=num_batches, desc=f'Epoch {epoch + 1}/{num_epochs}', unit='batch') as pbar:
        for i in range(0, len(train_x_tensor), batch_size):
            optimizer.zero_grad()
            x_batch = train_x_tensor[i:i + batch_size]
            y_batch = train_y_tensor[i:i + batch_size]

            # Forward pass
            y_pred = model(x_batch)

            # Calculate loss
            loss = criterion(y_pred, y_batch)
            epoch_loss += loss.item()

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Update progress bar
            pbar.update(1)
            pbar.set_postfix(loss=loss.item())

    avg_epoch_loss = epoch_loss / num_batches
    elapsed_time = time.time() - start_time
    time_left = (elapsed_time / (epoch + 1)) * (num_epochs - (epoch + 1))

    print(f'Epoch [{epoch + 1}/{num_epochs}], Avg Loss: {avg_epoch_loss:.4f}, Time Elapsed: {elapsed_time:.2f}s, Estimated Time Left: {time_left:.2f}s')

# Evaluation
model.eval()
with torch.no_grad():
    test_predictions = model(test_x_tensor)
    test_loss = criterion(test_predictions, test_y_tensor)

print(f'Test Loss: {test_loss.item():.4f}')


Epoch 1/200: 100%|██████████| 77840/77840 [10:14<00:00, 126.69batch/s, loss=1.04e+13]


Epoch [1/200], Avg Loss: 3453230395128.1934, Time Elapsed: 614.40s, Estimated Time Left: 122265.75s


Epoch 2/200:   4%|▍         | 3261/77840 [00:26<13:18, 93.40batch/s, loss=1.74e+10]