In [3]:
import pandas as pd
import numpy as np
import torch, torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [4]:
df1 = pd.read_excel("./cleaned.xlsx")
print(df1.head())

       TIMESTAMP  TARGETVAR       U10       V10      U100      V100  \
0  20120101 1:00   0.000000  2.124600 -2.681966  2.864280 -3.666076   
1  20120101 2:00   0.054879  2.521695 -1.796960  3.344859 -2.464761   
2  20120101 3:00   0.110234  2.672210 -0.822516  3.508448 -1.214093   
3  20120101 4:00   0.165116  2.457504 -0.143642  3.215233 -0.355546   
4  20120101 5:00   0.156940  2.245898  0.389576  2.957678  0.332701   

             DATETIME  
0 2012-01-01 01:00:00  
1 2012-01-01 02:00:00  
2 2012-01-01 03:00:00  
3 2012-01-01 04:00:00  
4 2012-01-01 05:00:00  


In [5]:
# --- Feature engineering (drop-in) ---
df_fe = df1.copy()

# Ensure time is usable
if "TIMESTAMP" in df_fe.columns:
    df_fe["TIMESTAMP"] = pd.to_datetime(df_fe["TIMESTAMP"])
    df_fe = df_fe.sort_values("TIMESTAMP")
    df_fe["hour"] = df_fe["TIMESTAMP"].dt.hour
    df_fe["dow"]  = df_fe["TIMESTAMP"].dt.dayofweek
else:
    # if you only have sequential rows, fake hour/dow (optional)
    df_fe["hour"] = np.arange(len(df_fe)) % 24
    df_fe["dow"]  = (np.arange(len(df_fe)) // 24) % 7

# Wind speed (magnitude) & direction (radians)
df_fe["WS10"]  = np.sqrt(df_fe["U10"]**2  + df_fe["V10"]**2)
df_fe["WS100"] = np.sqrt(df_fe["U100"]**2 + df_fe["V100"]**2)
df_fe["DIR10"]  = np.arctan2(df_fe["V10"],  df_fe["U10"])
df_fe["DIR100"] = np.arctan2(df_fe["V100"], df_fe["U100"])

# Harmonics of time to capture diurnal/weekly cycles
df_fe["sin_h"] = np.sin(2*np.pi*df_fe["hour"]/24)
df_fe["cos_h"] = np.cos(2*np.pi*df_fe["hour"]/24)
df_fe["sin_d"] = np.sin(2*np.pi*df_fe["dow"]/7)
df_fe["cos_d"] = np.cos(2*np.pi*df_fe["dow"]/7)

# AR lags of the target (power is very autocorrelated)
for k in [1, 24, 168]:  # last hour, yesterday, last week (assuming hourly data)
    df_fe[f"TARGETVAR_lag{k}"] = df_fe["TARGETVAR"].shift(k)

# OPTIONAL: a simple nonlinearity hint (turbulence-ish)
df_fe["dWS10"]  = df_fe["WS10"].diff()
df_fe["dWS100"] = df_fe["WS100"].diff()

# Now drop rows made NaN by lags/diffs at the start
df_fe = df_fe.dropna().reset_index(drop=True)

# Keep the modeling frame (don’t drop TIMESTAMP yet; we cleaned in the earlier version)
df = df_fe.drop(columns=[c for c in ["DATETIME"] if c in df_fe.columns])

# Update feature list
feat_cols = [
    "U10","V10","U100","V100",
    "WS10","WS100","DIR10","DIR100",
    "sin_h","cos_h","sin_d","cos_d",
    "TARGETVAR_lag1","TARGETVAR_lag24","TARGETVAR_lag168",
    "dWS10","dWS100"
]
target_col = "TARGETVAR"


In [6]:
L = 72   # try 72 first; you can test 48/72/96/168 later
H = 1


In [8]:
# --- Model: deeper LSTM with dropout
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

n_features = len(feat_cols)
hidden = 128
layers = 2
dropout = 0.2

lstm = nn.LSTM(
    input_size=n_features,
    hidden_size=hidden,
    num_layers=layers,
    batch_first=True,
    dropout=dropout  # only active when num_layers > 1
).to(DEVICE)
head = nn.Linear(hidden, 1).to(DEVICE)

loss_fn = nn.MSELoss()
opt = torch.optim.Adam(list(lstm.parameters()) + list(head.parameters()), lr=3e-4, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.5, patience=3, verbose=True)

# --- Train with early stopping
epochs = 80
patience = 8
no_improve = 0
best_val = float("inf")
best_state = None

for ep in range(1, epochs+1):
    lstm.train(); head.train()
    tr_loss_sum, n_items = 0.0, 0
    for xb, yb in train_loader:
        xb = xb.to(DEVICE, non_blocking=True)
        yb = yb.to(DEVICE, non_blocking=True)
        opt.zero_grad(set_to_none=True)
        out, _ = lstm(xb)
        last = out[:, -1, :]
        pred = head(last).squeeze(-1)
        loss = loss_fn(pred, yb)
        loss.backward()
        nn.utils.clip_grad_norm_(list(lstm.parameters())+list(head.parameters()), 1.0)
        opt.step()
        tr_loss_sum += loss.item() * xb.size(0)
        n_items += xb.size(0)
    train_mse = tr_loss_sum / max(n_items, 1)

    # validate on loader (averaged)
    lstm.eval(); head.eval()
    with torch.no_grad():
        v_loss_sum, v_items = 0.0, 0
        for xv, yv in val_loader:
            xv = xv.to(DEVICE, non_blocking=True)
            yv = yv.to(DEVICE, non_blocking=True)
            out, _ = lstm(xv); last = out[:, -1, :]
            p = head(last).squeeze(-1)
            v_loss_sum += loss_fn(p, yv).item() * xv.size(0)
            v_items += xv.size(0)
        val_mse = v_loss_sum / max(v_items, 1)

    scheduler.step(val_mse)
    print(f"epoch {ep:02d} | train_mse={train_mse:.6f} val_mse={val_mse:.6f} | lr={opt.param_groups[0]['lr']:.2e}")

    if val_mse + 1e-6 < best_val:
        best_val = val_mse
        best_state = (lstm.state_dict(), head.state_dict())
        no_improve = 0
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping.")
            break


KeyboardInterrupt: 