1.IMPORT LIBRARIES

In [1]:
!pip install -r requirements.txt

Collecting pytorch-lightning>=2.0.0 (from -r requirements.txt (line 7))
  Downloading pytorch_lightning-2.5.5-py3-none-any.whl.metadata (20 kB)
Collecting pytorch-forecasting>=1.0.0 (from -r requirements.txt (line 10))
  Downloading pytorch_forecasting-1.4.0-py3-none-any.whl.metadata (14 kB)
Collecting pyarrow>=12.0.0 (from -r requirements.txt (line 24))
  Using cached pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl.metadata (3.3 kB)
Collecting lightning<3.0.0,>=2.0.0 (from pytorch-forecasting>=1.0.0->-r requirements.txt (line 10))
  Downloading lightning-2.5.5-py3-none-any.whl.metadata (39 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from fsspec[http]>=2022.5.0->pytorch-lightning>=2.0.0->-r requirements.txt (line 7))
  Using cached aiohttp-3.12.15-cp313-cp313-macosx_11_0_arm64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning>=2.0.0->-r requirements.txt (line 7))
  Using cached aiohappyeyeballs-2.6.1-py3

In [14]:
# Core
import torch
import torch.nn as nn

# PyTorch Lightning for training
import pytorch_lightning as pl

# PyTorch Forecasting (TFT and dataset tools)
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss

# Data handling
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Metrics & utilities
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Progress & debugging
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

device = "mps" if torch.backends.mps.is_available() else "cpu"


2.DATA LOADER AND CREATE SEQUENCES

In [31]:
df = pd.read_csv("data/preprocessed_energy_data_AT_2019_2025.csv")
df = df[['DateUTC','Value']]
# Convert to datetime
df['DateUTC'] = pd.to_datetime(df['DateUTC'])

# Add time index (required by PyTorch Forecasting)
df = df.sort_values("DateUTC").reset_index(drop=True)
df['time_idx'] = np.arange(len(df))

# Add group id (needed even for single time series)
df['series_id'] = "AT"

# Add year/month for possible covariates
df['year'] = df['DateUTC'].dt.year
df['month'] = df['DateUTC'].dt.month

train_cutoff = df[df['year'] < 2025]['time_idx'].max()
max_encoder_length = 60   # how many past days to use
max_prediction_length = 31 # forecast horizon = 31 days (Jan 2025)

print(f"Dataset length: {len(df)}")
print(f"Train cutoff time_idx: {train_cutoff}")
print(f"Max encoder length: {max_encoder_length}")

# Training dataset
training = TimeSeriesDataSet(
    df[lambda x: x.time_idx <= train_cutoff],
    time_idx="time_idx",
    target="Value",
    group_ids=["series_id"],
    min_encoder_length=max_encoder_length//2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals=["time_idx", "month", "year"],
    time_varying_unknown_reals=["Value"],
    target_normalizer=None,
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

# Create validation dataset using the CORRECT method
validation = TimeSeriesDataSet.from_dataset(
    training, 
    df, 
    min_prediction_idx=train_cutoff + 1
)

batch_size = 64

# Use PyTorch Forecasting's to_dataloader() method - THIS IS CRITICAL
train_loader = training.to_dataloader(
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=0  # Set to 0 for debugging, then increase later
)

val_loader = validation.to_dataloader(
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=0
)

Dataset length: 54768
Train cutoff time_idx: 52607
Max encoder length: 60


3.MODEL

In [29]:
class TemporalFusionTransformer(pl.LightningModule):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2):
        super().__init__()
        self.save_hyperparameters()
        
        # Example architecture - replace with your actual TFT architecture
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)
        self.criterion = nn.MSELoss()
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        output = self.linear(lstm_out[:, -1, :])  # Take last timestep
        return output
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss, prog_bar=True)
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

# Create model instance
tft = TemporalFusionTransformer(
    input_size=10,   # Adjust based on your data
    hidden_size=64,  # Adjust based on your needs
    output_size=1    # Adjust based on your task
)

# Verify the model type
print(f"Model type: {type(tft)}")
print(f"Is LightningModule: {isinstance(tft, pl.LightningModule)}")

Model type: <class '__main__.TemporalFusionTransformer'>
Is LightningModule: True


4.TRAINING PIPELINE

In [30]:
trainer = pl.Trainer(
    max_epochs=20,
    accelerator="mps",
    gradient_clip_val=0.1,
    enable_checkpointing=True,
    enable_progress_bar=True,
    logger=False,
    enable_model_summary=True,
)

# Now this should work
trainer.fit(tft, train_loader, val_loader)


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name      | Type    | Params | Mode 
----------------------------------------------
0 | lstm      | LSTM    | 52.7 K | train
1 | linear    | Linear  | 65     | train
2 | criterion | MSELoss | 0      | train
----------------------------------------------
52.8 K    Trainable params
0         Non-trainable params
52.8 K    Total params
0.211     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                        | 0/? [00:00<?, ?it/s]

AttributeError: 'dict' object has no attribute 'dim'

In [None]:
raw_predictions, x = tft.predict(val_loader, mode="raw", return_x=True)

# Plot first sample prediction
tft.plot_prediction(x, raw_predictions, idx=0, add_loss_to_title=True)
