In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split
from sklearn.preprocessing import MinMaxScaler

# 1) Load & slice first 500 days
df = pd.read_csv("prices.txt", sep=r"\s+", header=None)
data = df.values[:500]            # shape (500, 50)
n_days, n_inst = data.shape

# 2) Build sliding windows
lookback = 20
X, y = [], []
for i in range(n_days - lookback - 1):
    X.append(data[i : i + lookback])      # (lookback, 50)
    y.append(data[i + lookback])          # (50,)
X = np.stack(X)                           # (N, lookback, 50)
y = np.stack(y)                           # (N, 50)

# 3) Scale each feature (instrument) to [0,1] using train portion
split = int(0.8 * len(X))
scaler_X = MinMaxScaler(); scaler_y = MinMaxScaler()

# fit on train
X_flat = X.reshape(-1, n_inst)
X_train_flat = X_flat[: split * lookback]
scaler_X.fit(X_train_flat)
X_scaled = scaler_X.transform(X_flat).reshape(X.shape)

y_flat = y
y_train_flat = y_flat[:split]
scaler_y.fit(y_train_flat)
y_scaled = scaler_y.transform(y_flat)

# convert to torch tensors
X_t = torch.tensor(X_scaled, dtype=torch.float32)
y_t = torch.tensor(y_scaled, dtype=torch.float32)

# 4) Create datasets & loaders
dataset = TensorDataset(X_t, y_t)
train_len = split
test_len  = len(dataset) - split
train_ds, test_ds = random_split(dataset, [train_len, test_len],
                                 generator=torch.Generator().manual_seed(42))

batch_size = 32
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False)

# 5) Define LSTM model
class PriceLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            batch_first=True)
        self.fc   = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        # x: (batch, lookback, input_size)
        out, _ = self.lstm(x)            # out: (batch, lookback, hidden_size)
        last   = out[:, -1, :]           # take final timestep
        return self.fc(last)             # (batch, output_size)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PriceLSTM(input_size=n_inst, hidden_size=64, output_size=n_inst).to(device)

# 6) Training setup
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
epochs = 20

# 7) Train loop
for epoch in range(1, epochs+1):
    model.train()
    total_loss = 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss  = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
    avg_train = total_loss / train_len
    print(f"Epoch {epoch:2d}  Train MSE: {avg_train:.6f}")

# 8) Evaluate on test set
model.eval()
total_loss = 0.0
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb = xb.to(device), yb.to(device)
        preds = model(xb)
        total_loss += criterion(preds, yb).item() * xb.size(0)
avg_test = total_loss / test_len
print(f"Hold-out Test MSE: {avg_test:.6f}")
