In [1]:
import numpy as np, pandas as pd, torch, torch.nn as nn
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader

In [None]:
DATA      = Path("data/")
CSV_IN    = DATA / "post_optimized_workloads_daily.csv"   # rows: day, cols: workloads_tagX …
CSV_OUT   = DATA / "lstm_forecast_next7.csv"

In [None]:
SEQ_LEN   = 14        # past-days window
HORIZON   = 7         # predict next 7 days
EPOCHS    = 20
BATCH     = 256
LR        = 1e-3
DEVICE    = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# 1.  Load post-optimised workload series
df = pd.read_csv(CSV_IN).set_index("day")
tags  = df.columns
data  = df.values.astype(np.float32)  

scalers = {}
for i, t in enumerate(tags):
    scalers[t] = MinMaxScaler()
    data[:, i:i+1] = scalers[t].fit_transform(data[:, i:i+1])

In [None]:
# 2.  Build (samples, seq_len, n_tags) tensors
X, y = [], []
for t in range(SEQ_LEN, len(data) - HORIZON + 1):
    X.append(data[t-SEQ_LEN:t])
    y.append(data[t:t+HORIZON])            # multivariate forecast
X = torch.tensor(X)                        # (samples, 14, n_tags)
y = torch.tensor(y)                        # (samples, 7, n_tags)

loader = DataLoader(TensorDataset(X, y), batch_size=BATCH, shuffle=True)

In [None]:
# 3.  multivariate LSTM
class LSTMForecast(nn.Module):
    def __init__(self, n_in, hidden=64, layers=2):
        super().__init__()
        self.lstm = nn.LSTM(n_in, hidden, layers, batch_first=True)
        self.fc   = nn.Linear(hidden, HORIZON * n_in)

    def forward(self, x):
        h, _ = self.lstm(x)
        out  = self.fc(h[:, -1])           # last step hidden
        return out.view(-1, HORIZON, x.size(2))

model = LSTMForecast(n_in=len(tags)).to(DEVICE)
opt   = torch.optim.Adam(model.parameters(), lr=LR)
lossf = nn.MSELoss()

In [None]:
# 4.  Train
for ep in range(EPOCHS):
    tot = 0
    for xb, yb in loader:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        opt.zero_grad()
        pred = model(xb)
        loss = lossf(pred, yb)
        loss.backward(); opt.step()
        tot += loss.item() * len(xb)
    print(f"Epoch {ep+1:02}/{EPOCHS}  loss={tot/len(loader.dataset):.4f}")

In [None]:
# 5.  Forecast next 7 days from last 14 days
model.eval()
seq_in = torch.tensor(data[-SEQ_LEN:]).unsqueeze(0).to(DEVICE)
with torch.no_grad():
    pred_scaled = model(seq_in).cpu().numpy()[0]           # (7, n_tags)

# inverse-scale
pred = np.zeros_like(pred_scaled)
for i, t in enumerate(tags):
    pred[:, i:i+1] = scalers[t].inverse_transform(pred_scaled[:, i:i+1])

forecast_days = np.arange(df.index.max() + 1, df.index.max() + 1 + HORIZON)
pred_df = pd.DataFrame(pred.astype(int), index=forecast_days, columns=tags)
pred_df.to_csv(CSV_OUT)

print("✔ 7-day workload forecast written to", CSV_OUT)