In [None]:
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import CSVLogger

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_lightning.tuner.lr_finder import _lr_find
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
# Pfade zu den Daten
data_path = r"C:\Users\maximilian.vanliende\Downloads\train_data.csv"
test_path = r"C:\Users\maximilian.vanliende\Downloads\test_data.csv"

In [None]:
# Daten einlesen
train_df = pd.read_csv(data_path, parse_dates=["Zeitstempel"])
test_df  = pd.read_csv(test_path,  parse_dates=["Zeitstempel"])

  train_df = pd.read_csv(data_path, parse_dates=["Zeitstempel"])
  test_df  = pd.read_csv(test_path,  parse_dates=["Zeitstempel"])


In [None]:
# Daten kombinieren und Split-Kennzeichnung
df = pd.concat(
    [train_df.assign(split="train"), test_df.assign(split="test")],
    ignore_index=True,
)

In [None]:
# Feature Engineering: Zeitmerkmale und Kategorien (nur Wetter)
df["meter_id"] = df["location_id"]

# Zeitmerkmale extrahieren
for part in ("month", "weekday", "hour"):
    df[part] = getattr(df["Zeitstempel"].dt, part).astype(str)

# Feiertags-Information
df["is_holiday"] = df["Ferientyp"].notna().astype(str)
df["Ferientyp"].fillna("None", inplace=True)

# Nur Wetterkategorien als kategorisch kennzeichnen
for col in ["is_holiday", "month", "weekday", "hour", "Ferientyp", "Feiertag"]:
    if col in df.columns:
        df[col] = pd.Categorical(df[col])

# Statische und zeitabhängige Merkmale (ohne Geodaten)
static_categoricals             = []  # keine Geo-Kategorien
static_reals                    = []  # keine Geo-Numerics
time_varying_known_categoricals = ["is_holiday", "month", "weekday", "hour"]
time_varying_known_reals        = ["time_idx", "cloud_cover_total", "humidity"]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Ferientyp'].fillna('None', inplace=True)


In [None]:
# Statische numerische Merkmale (nur Wetter-Füllung)
real_fill_cols = ["cloud_cover_total", "humidity"]
for col in real_fill_cols:
    if col in df.columns:
        df[col] = df[col].ffill().bfill().fillna(df[col].median())

# Unnötige Spalten entfernen und Duplikate löschen
df.drop(columns=["Unnamed: 0", "Kreis code", "Kreis name"], errors="ignore", inplace=True)
df.drop_duplicates(inplace=True)

In [None]:
# Aggregation auf Zeitindex und Standort
cat_cols_group = ["location_id", "split", "meter_id"] + \
    [c for c in ["is_holiday", "month", "weekday", "hour", "Ferientyp", "Feiertag"] if c in df.columns]
num_cols_group = df.select_dtypes(include="number").columns.difference(["location_id", "Zeitstempel"])

agg_dict = {col: "first" for col in cat_cols_group if col in df.columns}
agg_dict.update({col: "mean" for col in num_cols_group if col in df.columns})

df = df.groupby(["location_id", "Zeitstempel"], as_index=False).agg(agg_dict)

In [None]:
# Zeitindex erstellen
unique_times = pd.Series(df["Zeitstempel"].unique()).sort_values().reset_index(drop=True)
time_map = {time: idx for idx, time in enumerate(unique_times)}
df["time_idx"] = df["Zeitstempel"].map(time_map)

In [None]:
# Train/Test Split zurücksetzen
train_df = df[df.split == "train"].drop(columns="split")
test_df  = df[df.split == "test"].drop(columns="split")

In [None]:
# TimeSeriesDataSet erstellen
max_prediction_length = 96   # Vorhersagehorizont (~1 Tag)
max_encoder_length    = 288  # Encoder-Länge (~3 Tage)
training_cutoff       = df["time_idx"].max() - max_prediction_length

training = TimeSeriesDataSet(
    df[df.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="Messwert",
    group_ids=["meter_id"],
    static_categoricals=static_categoricals,
    static_reals=static_reals,
    time_varying_known_categoricals=time_varying_known_categoricals,
    time_varying_known_reals=time_varying_known_reals,
    time_varying_unknown_reals=["Messwert"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    min_encoder_length=max_encoder_length // 2,
    min_prediction_length=1,
    target_normalizer=GroupNormalizer(
        groups=["meter_id"], transformation="softplus"
    ),
    time_varying_unknown_categoricals=[],
    allow_missing_timesteps=True,
    add_relative_time_idx=True,
    add_encoder_length=True,
    add_target_scales=True,
)

validation = TimeSeriesDataSet.from_dataset(
    training, df, predict=True, stop_randomization=True
)

batch_size = 128
train_dataloader = training.to_dataloader(
    train=True, batch_size=batch_size, num_workers=6, persistent_workers=True
)
val_dataloader = validation.to_dataloader(
    train=False, batch_size=batch_size * 10, num_workers=0
)

In [None]:
# Basis-Baseline berechnen (letzter Wert als Vorhersage)
actuals = torch.cat([y for x, (y, _) in iter(val_dataloader)])
baseline_preds = Baseline().predict(val_dataloader)
baseline_mae = (actuals - baseline_preds).abs().mean().item()
print(f"Baseline MAE: {baseline_mae:.4f}")

c:\Users\maximilian.vanliende\AppData\Local\anaconda3\Lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\maximilian.vanliende\AppData\Local\anaconda3\Lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\maximilian.vanliende\AppData\Local\anaconda3\Lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts wi

Baseline MAE: 0.1110


c:\Users\maximilian.vanliende\AppData\Local\anaconda3\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


In [None]:
#  Modelltraining konfigurieren
pl.seed_everything(42)
early_stop = EarlyStopping(monitor="val_loss", min_delta=1e-5, patience=10, mode="min")
lr_monitor = LearningRateMonitor(logging_interval="epoch")
logger     = CSVLogger("logs_csv")

trainer = pl.Trainer(
    max_epochs=50,
    precision="64",
    devices="auto",
    gradient_clip_val=0.1,
    limit_train_batches=30,
    callbacks=[lr_monitor, early_stop],
    logger=logger,
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=1e-4,
    hidden_size=32,
    attention_head_size=4,
    lstm_layers=2,
    dropout=0.2,
    hidden_continuous_size=16,
    optimizer="adam",
    output_size=7,
    loss=QuantileLoss(),
    log_interval=1,
    reduce_on_plateau_patience=4,
)
print(f"Anzahl Parameter im Modell: {tft.size()/1e3:.1f}k")

trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\maximilian.vanliende\AppData\Local\anaconda3\Lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\maximilian.vanliende\AppData\Local\anaconda3\Lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0    

Anzahl Parameter im Modell: 118.6k


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\maximilian.vanliende\AppData\Local\anaconda3\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
c:\Users\maximilian.vanliende\AppData\Local\anaconda3\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (30) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [None]:
# Learning Rate Finder
lr_find_result = _lr_find(
    model=tft,
    trainer=trainer,
    max_lr=10.0,
    min_lr=1e-6,
    num_training=100
)
print(f"Empfohlene Lernrate: {lr_find_result.suggestion():.2e}")
fig = lr_find_result.plot(show=True, suggest=True)
fig.show()

In [None]:
# Evaluation: Metriken berechnen
tft = tft.float()
tft.eval()

with torch.no_grad():
    preds = tft.predict(val_dataloader)
    preds = torch.from_numpy(preds).float() if not isinstance(preds, torch.Tensor) else preds

actuals = torch.cat([y for x, (_, _) in iter(val_dataloader)]).float()

preds_flat   = preds.numpy().flatten()
actuals_flat = actuals.numpy().flatten()

mse               = mean_squared_error(actuals_flat, preds_flat)
rmse              = mse**0.5
mae               = mean_absolute_error(actuals_flat, preds_flat)
smape_val         = SMAPE()(preds, actuals).item()
quantile_loss_val = QuantileLoss()(preds, actuals).item()

print(f"MSE:           {mse:.4f}")
print(f"RMSE:          {rmse:.4f}")
print(f"MAE:           {mae:.4f}")
print(f"SMAPE:         {smape_val:.2f}%")
print(f"Quantile Loss: {quantile_loss_val:.4f}")


In [None]:
# Plots: Beispielserie Vorhersage vs. Ist
tft.eval()
with torch.no_grad():
    preds_all  = tft.predict(val_dataloader)
actuals_all = torch.cat([y for x, (y, _) in iter(val_dataloader)])

series_idx  = 0
true_series = actuals_all[series_idx].numpy()
pred_series = preds_all[series_idx].numpy()

plt.figure()
plt.plot(true_series, label="Tatsächlich")
plt.plot(pred_series, label="Vorhersage (Median)")
plt.legend()
plt.title("Ist vs. Vorhersage für eine Serie")
plt.xlabel("Zeitschritt")
plt.ylabel("Messwert")
plt.show()

In [None]:
# Residuen über die Vorhersageperiode
residuals = true_series - pred_series
plt.figure()
plt.plot(residuals)
plt.axhline(0, linestyle="--")
plt.title("Residuen über die Vorhersageperiode")
plt.xlabel("Zeitschritt")
plt.ylabel("Residuum")
plt.show()

In [None]:
# Fehlerverteilung (Histogramm)
all_errors = (actuals_all.numpy().flatten() - preds_all.numpy().flatten())
plt.figure()
plt.hist(all_errors, bins=50)
plt.title("Verteilung der Vorhersagefehler")
plt.xlabel("Fehler (Ist – Vorh.)")
plt.ylabel("Häufigkeit")
plt.show()

In [None]:
# Quantile-Bänder (10%, 50%, 90%)
quantiles = [0.1, 0.5, 0.9]
tft.eval()
with torch.no_grad():
    preds_q = tft.predict(
        val_dataloader,
        mode="quantiles",
        mode_kwargs={"quantiles": quantiles},
    )

actuals_all = torch.cat([y for x, (y, _) in iter(val_dataloader)])
q_series    = preds_q[series_idx]
lower       = q_series[:, 0].numpy()
median      = q_series[:, 1].numpy()
upper       = q_series[:, 2].numpy()

plt.figure()
plt.plot(median, label="Median")
plt.fill_between(range(len(median)), lower, upper, alpha=0.2)
plt.plot(true_series, label="Tatsächlich")
plt.legend()
plt.title("Quantile‑Bänder (10%–90%) vs. Median")
plt.xlabel("Zeitschritt")
plt.ylabel("Messwert")
plt.show()

In [None]:
# Zeitachse mit Datumsformat
start_date = pd.Timestamp("2018-01-01")  # Anpassen auf ersten Messmonat
dates      = pd.date_range(start_date, periods=len(true_series), freq="M")

fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(dates, true_series, label="Tatsächlich")
ax.plot(dates, pred_series, label="Vorhersage (Median)")

ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
ax.xaxis.set_minor_locator(mdates.MonthLocator())

fig.autofmt_xdate()
ax.set_title("Ist vs. Vorhersage für eine Serie")
ax.set_xlabel("Zeit")
ax.set_ylabel("Messwert")
ax.legend()
plt.tight_layout()
plt.show()