In [None]:
import pandas as pd
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting import TemporalFusionTransformer, QuantileLoss
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor

/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/fabric/__init__.py:40: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.


In [2]:
DATA_PATH   = "/Users/maxi/Desktop/train_data.csv"
TEST_PATH   = "/Users/maxi/Desktop/test_data.csv"
DATE_COL    = "Zeitstempel"
TARGET_COL  = "Messwert"
ID_COL      = "location_id"

In [3]:
df_train = pd.read_csv(DATA_PATH, parse_dates=[DATE_COL])
df_test  = pd.read_csv(TEST_PATH,  parse_dates=[DATE_COL])
df = pd.concat([df_train.assign(split="train"), df_test.assign(split="test")])
df.drop(columns=["Unnamed: 0"], inplace=True)

  df_train = pd.read_csv(DATA_PATH, parse_dates=[DATE_COL])
  df_test  = pd.read_csv(TEST_PATH,  parse_dates=[DATE_COL])


In [4]:
# Grundspalten
df["meter_id"] = df[ID_COL]
origin = df[DATE_COL].min()
df["time_idx"] = ((df[DATE_COL] - origin).dt.total_seconds() // 3600).astype(int)

In [5]:
# Zeitfeatures
df["month"]   = df[DATE_COL].dt.month
df["weekday"] = df[DATE_COL].dt.weekday
df["hour"]    = df[DATE_COL].dt.hour

In [6]:
# Feiertage
df["is_holiday"] = df["Ferientyp"].notna().astype(int)
df["Ferientyp"]  = df["Ferientyp"].fillna("None")

In [7]:
# Wetter-Interpolation (falls vorhanden)
for col in ["cloud_cover_total", "humidity"]:
    if col in df.columns:
        df[col] = df[col].interpolate(limit_direction="both")

In [8]:
# Statische Reals
static_reals = [
    "area", "Bruttoverdienst", "Durchschnittsalter der Bevölkerung",
    "Erwerbstätige", "Kaufkraft", "Medianeinkommen",
    "Neubauwohnungen in Ein- und Zweifamilienhäusern",
    "Neubauwohnungen je Einwohner", "Schuldnerquote",
    "Siedlungsdichte in km²"
]
df[static_reals] = df[static_reals].fillna(df[static_reals].median())

In [9]:
# nach dem Zusammenführen von df_train/df_test einfach ergänzen:
df["post_code"] = df["post_code"].astype(str)

# und bei der Typ-Schleife post_code mit aufnehmen:
for col in ["city", "post_code", "Ferientyp", "month", "weekday", "hour", "is_holiday"]:
    df[col] = df[col].astype(str)

train = df[df["split"] == "train"].drop(columns=["split"])
test  = df[df["split"] == "test"].drop(columns=["split"])

In [10]:
# Feature-Listen
static_categoricals = ["city", "post_code"]
time_varying_known_categoricals = ["month", "weekday", "hour", "is_holiday", "Ferientyp"]
time_varying_known_reals = ["time_idx", "cloud_cover_total", "humidity"]

In [11]:
# TFT-Dataset
max_encoder_length    = 24 * 7   # 7 Tage
max_prediction_length = 24       # 24 h

training = TimeSeriesDataSet(
    train,
    time_idx="time_idx",
    target=TARGET_COL,
    group_ids=["meter_id"],
    static_categoricals           = static_categoricals,
    static_reals                  = static_reals,
    time_varying_known_categoricals = time_varying_known_categoricals,
    time_varying_known_reals        = time_varying_known_reals,
    time_varying_unknown_reals      = [TARGET_COL],
    max_encoder_length   = max_encoder_length,
    max_prediction_length= max_prediction_length,
    target_normalizer    = GroupNormalizer(groups=["meter_id"]),
    add_relative_time_idx=True,
    add_target_scales    = True,
    add_encoder_length   = True,
    allow_missing_timesteps=True,          
)

In [None]:
batch_size = 128
train_loader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=4)

training_cutoff = train["time_idx"].max() - max_prediction_length
val_dataset = TimeSeriesDataSet.from_dataset(
    training, train, min_prediction_idx=training_cutoff + 1, stop_randomization=True
)
val_loader = val_dataset.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

# --------------------------------------------------
# 4) Modell, Trainer, Training
# --------------------------------------------------
seed_everything(42, workers=True)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=1e-3,
    hidden_size=32,
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=16,
    loss=QuantileLoss(),
)

trainer = Trainer(
    max_epochs=30,
    accelerator="auto",
    devices="auto",
    gradient_clip_val=0.1,
    callbacks=[
        EarlyStopping(monitor="val_loss", patience=6, mode="min"),
        LearningRateMonitor("epoch"),
    ],
)
trainer.fit(
    tft,
    train_dataloaders=train_loader,   # ← statt positional
    val_dataloaders=val_loader        # ← statt positional
)

# --------------------------------------------------
# 5) Test-Evaluation
# --------------------------------------------------
test_dataset = TimeSeriesDataSet.from_dataset(
    training, test, stop_randomization=True
)
test_loader = test_dataset.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pred, idx = tft.predict(test_loader, mode="prediction", return_index=True)
y_true = test.iloc[idx][TARGET_COL].values

print("MAE :", mean_absolute_error(y_true, pred))
print("RMSE:", mean_squared_error(y_true, pred, squared=False))

# --------------------------------------------------
# 6) Checkpoint speichern
# --------------------------------------------------
trainer.save_checkpoint("tft_stromverbrauch.ckpt")

Seed set to 42
/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:67: Starting from v1.9.0

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/fabric/__init__.py:40: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/fabric/__init__.py:40: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
/Users/maxi/Documents/GitHu

Training: |          | 0/? [00:00<?, ?it/s]