In [7]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import joblib

# ------------------------------
# Device
# ------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ------------------------------
# Load data
# ------------------------------
df = pd.read_csv("SPX.csv", parse_dates=["Date"], index_col="Date")
data = df['Close'].values.reshape(-1, 1)
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

# ------------------------------
# Create sequences
# ------------------------------
def create_sequences(data, seq_len):
    X, y = [], []
    for i in range(seq_len, len(data)):
        X.append(data[i-seq_len:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

# ------------------------------
# Hyperparameters
# ------------------------------
seq_len = min(512, len(data_scaled)-1)  # safe for GPU
batch_size = 32  # safe for 6GB GPU
hidden_size = 256
num_layers = 3
dropout = 0.2
epochs = 50
lr = 0.001

# ------------------------------
# Prepare sequences
# ------------------------------
X, y = create_sequences(data_scaled, seq_len)
X = torch.tensor(X).float().unsqueeze(-1)  # [samples, seq_len, 1]
y = torch.tensor(y).float()  # [samples]

dataset = torch.utils.data.TensorDataset(X, y)
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

# ------------------------------
# Define LSTM model
# ------------------------------
class StockLSTM(nn.Module):
    def __init__(self, hidden_size=256, num_layers=3, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=1,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout
        )
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1])
        return out

# ------------------------------
# Initialize model
# ------------------------------
model = StockLSTM(hidden_size=hidden_size, num_layers=num_layers, dropout=dropout).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.MSELoss()

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

# ------------------------------
# Training loop
# ------------------------------
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for batch_X, batch_y in loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()
        pred = model(batch_X).squeeze()
        loss = loss_fn(pred, batch_y)
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        epoch_loss += loss.item()
    
    scheduler.step()
    
    if epoch % 5 == 0 or epoch == epochs - 1:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(loader):.6f}")

# ------------------------------
# Save model and scaler
# ------------------------------
torch.save(model.state_dict(), "lstm_spx_gpu_safe.pth")
joblib.dump(scaler, "scaler_spx_gpu_safe.save")
print("Model and scaler saved successfully!")


Using device: cuda
Epoch 1/50, Loss: 0.001027
Epoch 6/50, Loss: 0.000094
Epoch 11/50, Loss: 0.000061
Epoch 16/50, Loss: 0.000047
Epoch 21/50, Loss: 0.000032
Epoch 26/50, Loss: 0.000031
Epoch 31/50, Loss: 0.000024
Epoch 36/50, Loss: 0.000023
Epoch 41/50, Loss: 0.000019
Epoch 46/50, Loss: 0.000018
Epoch 50/50, Loss: 0.000018
Model and scaler saved successfully!
