# Train TFT (Temporal Fusion Transformer) — ARGUS Phase 2

Trains a simplified Temporal Fusion Transformer for multi-horizon
flood risk forecasting (24h ahead with quantile uncertainty).

**Owner:** Sabarish · **Service:** `prediction` (deep track)

Since full TFT requires PyTorch Forecasting, this notebook uses a
simplified attention-based LSTM architecture as a portable fallback.

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

np.random.seed(42)

# Generate synthetic temporal flood-risk data
N_DAYS = 365
HOURS_PER_DAY = 24
N = N_DAYS * HOURS_PER_DAY

t = np.arange(N)
# Monsoon seasonality (June-Sept peak)
monsoon = 0.5 + 0.4 * np.sin(2 * np.pi * (t / (365 * 24) - 0.4))
monsoon = np.clip(monsoon, 0, 1)

rainfall = monsoon * np.random.exponential(10, N) + np.random.normal(0, 2, N)
rainfall = np.clip(rainfall, 0, None)

water_level = np.zeros(N)
water_level[0] = 2.0
for i in range(1, N):
    water_level[i] = 0.95 * water_level[i-1] + 0.005 * rainfall[i] + 0.001 * monsoon[i] + np.random.normal(0, 0.05)
    water_level[i] = max(0.5, water_level[i])

soil_moisture = 0.3 + 0.4 * monsoon + 0.1 * np.random.randn(N)
soil_moisture = np.clip(soil_moisture, 0, 1)

risk = 1 / (1 + np.exp(-(water_level - 3.0) * 2)) + 0.1 * soil_moisture
risk = np.clip(risk, 0, 1)

df = pd.DataFrame({
    'hour': t,
    'rainfall_mm': rainfall,
    'water_level_m': water_level,
    'soil_moisture': soil_moisture,
    'monsoon_signal': monsoon,
    'risk': risk,
})

print(f'Dataset: {df.shape}')
df.head(10)

## Prepare Sequences for LSTM Attention Model

In [None]:
LOOKBACK = 48     # 48 hours of history
HORIZON = 24      # 24 hours ahead
FEATURES = ['rainfall_mm', 'water_level_m', 'soil_moisture', 'monsoon_signal']
TARGET = 'risk'

# Normalise features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[FEATURES] = scaler.fit_transform(df[FEATURES])

# Create sequences
X, Y = [], []
values = df[FEATURES].values
targets = df[TARGET].values

for i in range(LOOKBACK, len(df) - HORIZON):
    X.append(values[i - LOOKBACK:i])
    Y.append(targets[i:i + HORIZON])

X = np.array(X, dtype=np.float32)
Y = np.array(Y, dtype=np.float32)

# Train/val split
split = int(0.8 * len(X))
X_train, X_val = X[:split], X[split:]
Y_train, Y_val = Y[:split], Y[split:]

print(f'X_train: {X_train.shape}, Y_train: {Y_train.shape}')
print(f'X_val:   {X_val.shape},   Y_val:   {Y_val.shape}')

## Define Attention LSTM Model

In [None]:
try:
    import torch
    import torch.nn as nn
    TORCH_OK = True
except ImportError:
    TORCH_OK = False
    print('PyTorch not available — skipping model training')

if TORCH_OK:
    class AttentionLSTM(nn.Module):
        """LSTM with temporal attention for multi-horizon forecasting."""
        def __init__(self, n_features=4, hidden=64, n_layers=2, horizon=24):
            super().__init__()
            self.lstm = nn.LSTM(n_features, hidden, n_layers, batch_first=True, dropout=0.1)
            self.attention = nn.Linear(hidden, 1)
            self.fc = nn.Sequential(
                nn.Linear(hidden, 32),
                nn.ReLU(),
                nn.Linear(32, horizon),
                nn.Sigmoid(),
            )
        
        def forward(self, x):
            lstm_out, _ = self.lstm(x)  # (B, T, H)
            # Attention weights
            attn_w = torch.softmax(self.attention(lstm_out), dim=1)  # (B, T, 1)
            context = (attn_w * lstm_out).sum(dim=1)  # (B, H)
            return self.fc(context)  # (B, horizon)
    
    model = AttentionLSTM(n_features=len(FEATURES), horizon=HORIZON)
    print(f'Model parameters: {sum(p.numel() for p in model.parameters()):,}')
    print(model)

## Train Model

In [None]:
if TORCH_OK:
    from torch.utils.data import TensorDataset, DataLoader
    
    train_ds = TensorDataset(torch.tensor(X_train), torch.tensor(Y_train))
    val_ds = TensorDataset(torch.tensor(X_val), torch.tensor(Y_val))
    train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=128)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()
    
    EPOCHS = 20
    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0
        for xb, yb in train_dl:
            pred = model(xb)
            loss = criterion(pred, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * len(xb)
        train_loss /= len(train_ds)
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for xb, yb in val_dl:
                pred = model(xb)
                val_loss += criterion(pred, yb).item() * len(xb)
        val_loss /= len(val_ds)
        
        if (epoch + 1) % 5 == 0 or epoch == 0:
            print(f'Epoch {epoch+1:3d}/{EPOCHS}  train_loss={train_loss:.5f}  val_loss={val_loss:.5f}')
    
    # Save model
    model_path = Path('../models/tft_attention_lstm.pt')
    model_path.parent.mkdir(parents=True, exist_ok=True)
    torch.save(model.state_dict(), model_path)
    print(f'\nModel saved to {model_path}')

## Evaluate & Visualise Predictions

In [None]:
if TORCH_OK:
    model.eval()
    with torch.no_grad():
        sample_x = torch.tensor(X_val[:5])
        preds = model(sample_x).numpy()
        actuals = Y_val[:5]
    
    for i in range(3):
        print(f'\nSample {i+1}:')
        print(f'  Predicted (first 6h): {preds[i, :6].round(3)}')
        print(f'  Actual    (first 6h): {actuals[i, :6].round(3)}')
        mae = np.mean(np.abs(preds[i] - actuals[i]))
        print(f'  MAE: {mae:.4f}')
    
    overall_mae = np.mean(np.abs(preds - actuals))
    print(f'\nOverall MAE (5 samples): {overall_mae:.4f}')