In [3]:
# one_step_from_24.py
import json, numpy as np, pandas as pd, torch, torch.nn as nn, joblib
from pathlib import Path

TIME_COL   = "TIMESTAMP"
TARGET_COL = "TARGETVAR"
BASE_FEATS = ["U10","V10","U100","V100"]
LAGS_Y     = [1,3,6,12,24]
LAGS_SPEED = [1,3,6]
ROLLS_Y    = [6,12,24]
DEVICE     = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def add_engineered_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out["speed10"]  = np.sqrt(out["U10"]**2  + out["V10"]**2)
    out["speed100"] = np.sqrt(out["U100"]**2 + out["V100"]**2)
    d10  = np.arctan2(out["V10"],  out["U10"])
    d100 = np.arctan2(out["V100"], out["U100"])
    out["dir10_sin"], out["dir10_cos"]   = np.sin(d10),  np.cos(d10)
    out["dir100_sin"], out["dir100_cos"] = np.sin(d100), np.cos(d100)
    out["shear_speed"] = out["speed100"] - out["speed10"]
    veer = d100 - d10
    out["veer_sin"], out["veer_cos"] = np.sin(veer), np.cos(veer)
    out["hour"] = pd.to_datetime(out[TIME_COL]).dt.hour
    out["day"]  = pd.to_datetime(out[TIME_COL]).dt.dayofyear
    out["hour_sin"] = np.sin(2*np.pi*out["hour"]/24.0)
    out["hour_cos"] = np.cos(2*np.pi*out["hour"]/24.0)
    out["day_sin"]  = np.sin(2*np.pi*out["day"]/366.0)
    out["day_cos"]  = np.cos(2*np.pi*out["day"]/366.0)
    for L in LAGS_Y:
        out[f"y_lag{L}"] = out[TARGET_COL].shift(L)
    for W in ROLLS_Y:
        out[f"y_roll{W}"] = out[TARGET_COL].shift(1).rolling(W, min_periods=W).mean()
    for L in LAGS_SPEED:
        out[f"speed10_lag{L}"]  = out["speed10"].shift(L)
        out[f"speed100_lag{L}"] = out["speed100"].shift(L)
    return out

def build_feat_list():
    return (
        BASE_FEATS +
        ["speed10","speed100","dir10_sin","dir10_cos","dir100_sin","dir100_cos",
         "shear_speed","veer_sin","veer_cos","hour_sin","hour_cos","day_sin","day_cos"] +
        [f"y_lag{L}" for L in LAGS_Y] +
        [f"y_roll{W}" for W in ROLLS_Y] +
        [f"speed10_lag{L}" for L in LAGS_SPEED] +
        [f"speed100_lag{L}" for L in LAGS_SPEED]
    )

class BiLSTMRegressor(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, bidirectional=True):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
            batch_first=True, dropout=dropout if num_layers>1 else 0.0,
            bidirectional=bidirectional
        )
        out_size = hidden_size * (2 if bidirectional else 1)
        self.norm = nn.LayerNorm(out_size)
        self.head = nn.Sequential(nn.Linear(out_size, out_size), nn.GELU(), nn.Dropout(dropout), nn.Linear(out_size,1))
    def forward(self, x):
        o,_ = self.lstm(x); last = self.norm(o[:, -1, :]); return self.head(last)

def load_artifacts(model_root: str):
    root = Path(model_root)
    with open(root / "../biLSTM/best_params.json","r") as f: best_params = json.load(f)
    xsc = joblib.load(root / "../biLSTM/x_scaler_optuna.pkl")
    ysc = joblib.load(root / "../biLSTM/y_scaler_optuna.pkl")
    state = torch.load(root / "../biLSTM/bilstm_optuna_best.pt", map_location=DEVICE)
    return best_params, xsc, ysc, state

def predict_next_from_last24(model_root: str, last24_df: pd.DataFrame, future_weather: dict|None=None):
    """
    last24_df: DataFrame with at least 24 recent rows and columns:
        TIMESTAMP, TARGETVAR, U10, V10, U100, V100 (same units/schema as training).
    future_weather (optional): dict with keys 'U10','V10','U100','V100' for the *next* hour.
        If provided, we’ll use these exogenous values for the t+1 features.
        If None, we 'hold' the last known exogenous values.
    """
    best, xsc, ysc, state = load_artifacts(model_root)

    df = last24_df.copy().sort_values(TIME_COL).reset_index(drop=True)

    # If the model’s lookback > provided rows, we can’t predict
    lookback = int(best["lookback"])
    if len(df) < lookback:
        raise ValueError(f"Need at least {lookback} rows of history; got {len(df)}.")

    # Optionally append a synthetic next-hour exogenous row (same timestamp +1h)
    if future_weather is None:
        fut = df.iloc[[-1]][[TIME_COL]+BASE_FEATS].copy()
        fut[TIME_COL] = pd.to_datetime(fut[TIME_COL]) + pd.Timedelta(hours=1)
    else:
        fut = pd.DataFrame([{
            TIME_COL: pd.to_datetime(df[TIME_COL].iloc[-1]) + pd.Timedelta(hours=1),
            "U10": future_weather["U10"], "V10": future_weather["V10"],
            "U100": future_weather["U100"], "V100": future_weather["V100"],
        }])
    # set TARGETVAR for the future row temporarily with NaN; we’ll fill it using lags after FE
    fut[TARGET_COL] = np.nan

    # Build a small working frame = history + placeholder next hour
    work = pd.concat([df[[TIME_COL, TARGET_COL]+BASE_FEATS], fut], ignore_index=True)

    # For engineered y-lag/roll features, we need actual history TARGETVAR (available in df).
    # After FE, the last row will have all lags computed from history; TARGETVAR itself is NaN for that last row.
    dfe = add_engineered_features(work)

    # Drop only rows that are still incomplete *before* the last row
    dfe_hist = dfe.iloc[:-1].dropna().copy()
    if len(dfe_hist) < lookback:
        raise ValueError(f"After feature lags/rolls, not enough rows to form a {lookback}-step window. "
                         f"Provide a bit more history (≥ {lookback+1} rows).")

    # The final feature row we’ll predict on is the very last row (t+1), which has complete lags from history
    feat_cols = build_feat_list()
    X_hist = dfe_hist[feat_cols].to_numpy(np.float32)

    # scale with training scalers
    X_hist_s = xsc.transform(X_hist)

    # Build the input window (last `lookback` rows)
    X_window = X_hist_s[-lookback:, :]                       # shape (lookback, n_feats)
    xb = torch.from_numpy(X_window[None, ...]).float().to(DEVICE)

    # Rebuild model & load weights
    model = BiLSTMRegressor(
        input_size=X_window.shape[-1],
        hidden_size=int(best["hidden"]),
        num_layers=int(best["layers"]),
        dropout=float(best["dropout"]),
        bidirectional=bool(best["bidir"])
    ).to(DEVICE)
    model.load_state_dict(state)
    model.eval()

    with torch.no_grad():
        yhat_s = model(xb).cpu().numpy()                    # scaled
    # Invert scaling (and log if used)
    yhat = ysc.inverse_transform(yhat_s).ravel()[0]
    if bool(best["log_target"]):
        yhat = np.expm1(yhat)

    next_ts = pd.to_datetime(df[TIME_COL].iloc[-1]) + pd.Timedelta(hours=1)
    return next_ts, float(yhat)


In [4]:
import pandas as pd

# 1) Put your last 24 rows into a DataFrame:
# Must have columns: TIMESTAMP, TARGETVAR, U10, V10, U100, V100
last24 = pd.read_csv("my_last_24.csv")  # or build manually

# 2) If you already know the next hour’s weather, pass it (optional):
future_weather = {
    "U10": 3.5, "V10": -1.2,
    "U100": 5.1, "V100": -1.8
}
# If you don’t know it, set future_weather=None (the code will hold the last known values)

# 3) Predict the next hour:
next_ts, y_pred = predict_next_from_last24(
    model_root=".",            # folder with best_params.json + biLSTM/*
    last24_df=last24,
    future_weather=None        # or future_weather dict as above
)
print(next_ts, y_pred)


2025-08-01 10:00:00 0.1537948101758957
