In [1]:
import pandas as pd, torch, os
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import CSVLogger
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer, QuantileLoss
from pytorch_forecasting.data import GroupNormalizer
from sklearn.metrics import mean_absolute_error, mean_squared_error

DATA_PATH = "/Users/maxi/Desktop/train_data.csv"
TEST_PATH = "/Users/maxi/Desktop/test_data.csv"

train = pd.read_csv(DATA_PATH, nrows=80_000, parse_dates=["Zeitstempel"])
test  = pd.read_csv(TEST_PATH,  nrows=20_000, parse_dates=["Zeitstempel"])

seed_everything(42, workers=True)

/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/fabric/__init__.py:40: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
  train = pd.read_csv(DATA_PATH, nrows=80_000, parse_dates=["Zeitstempel"])
Seed set to 42


42

In [2]:
df = pd.concat([train.assign(split="train"), test.assign(split="test")], ignore_index=True)

df["meter_id"] = df["location_id"]

origin = df["Zeitstempel"].min()
df["time_idx"] = ((df["Zeitstempel"] - origin).dt.total_seconds() // 3600).astype(int)

for part in ("month", "weekday", "hour"):
    df[part] = getattr(df["Zeitstempel"].dt, part).astype(str)

df["is_holiday"] = df["Ferientyp"].notna().astype(str)
df["Ferientyp"]  = df["Ferientyp"].fillna("None").astype(str)
df["post_code"]  = df["post_code"].astype(str)
df["city"]       = df["city"].astype(str)

cat_cols = ["city", "post_code", "Ferientyp", "month", "weekday", "hour", "is_holiday"]
for c in cat_cols:
    df[c] = pd.Categorical(df[c])               # gemeinsamer Kategorie-Pool

static_reals = [
    "area", "Bruttoverdienst", "Durchschnittsalter der Bevölkerung",
    "Erwerbstätige", "Kaufkraft", "Medianeinkommen",
    "Neubauwohnungen in Ein- und Zweifamilienhäusern",
    "Neubauwohnungen je Einwohner", "Schuldnerquote",
    "Siedlungsdichte in km²"
]

# fehlende numerische Werte füllen
real_fill = ["cloud_cover_total", "humidity"] + static_reals
for col in real_fill:
    if col in df.columns:
        df[col] = (
            df[col]
            .fillna(method="ffill")
            .fillna(method="bfill")
            .fillna(df[col].median())
        )

train = df[df.split == "train"].drop(columns="split")
test  = df[df.split == "test"].drop(columns="split")

In [3]:
ENC_LEN, PRED_LEN = 72, 24

static_categoricals            = ["city", "post_code"]
time_varying_known_categoricals = ["is_holiday"]          #  Ferientyp raus
time_varying_known_reals        = ["time_idx", "month", "weekday", "hour",
                                   "cloud_cover_total", "humidity"]

# 2)  Beim Category-Pool Ferientyp ebenfalls weglassen
cat_cols = ["city", "post_code", "month", "weekday", "hour", "is_holiday"]
for c in cat_cols:
    df[c] = pd.Categorical(df[c])

training = TimeSeriesDataSet(
    train,
    time_idx="time_idx",
    target="Messwert",
    group_ids=["meter_id"],
    static_categoricals           = static_categoricals,
    static_reals                  = static_reals,
    time_varying_known_categoricals = time_varying_known_categoricals,
    time_varying_known_reals        = time_varying_known_reals,
    time_varying_unknown_reals      = ["Messwert"],
    max_encoder_length  = ENC_LEN,
    max_prediction_length = PRED_LEN,
    target_normalizer   = GroupNormalizer(groups=["meter_id"]),
    allow_missing_timesteps=True,
)
# Validation-Split = letztes Fenster des Train-Sets
val_ds = TimeSeriesDataSet.from_dataset(
    training, train, min_prediction_idx=train["time_idx"].max() - PRED_LEN + 1
)

test_ds = TimeSeriesDataSet.from_dataset(training, test, stop_randomization=True)

batch = 128
train_dl = training.to_dataloader(train=True,  batch_size=batch, num_workers=10, shuffle=True)
val_dl   = val_ds.to_dataloader(train=False, batch_size=batch, num_workers=10)
test_dl  = test_ds.to_dataloader(train=False, batch_size=batch, num_workers=10)

In [None]:
tft = TemporalFusionTransformer.from_dataset(
    training,
    hidden_size             = 16,
    attention_head_size     = 2,
    hidden_continuous_size  = 8,
    dropout                 = 0.2,
    learning_rate           = 1e-3,
    loss                    = QuantileLoss(),
)

early_stop = EarlyStopping(monitor="val_loss", patience=3, mode="min", verbose=True)
logger     = CSVLogger("lightning_logs", name="TFT-Projekt")

trainer = Trainer(
    max_epochs        = 30,
    accelerator       = "mps",
    devices           = "auto",
    precision         = 32,
    gradient_clip_val = 0.1,
    callbacks         = [early_stop, LearningRateMonitor("epoch")],
    logger            = logger,
)

trainer.fit(tft, train_dataloaders=train_dl, val_dataloaders=val_dl)

/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/fabric/__init__.py:40: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
/Users/maxi/Documents/GitHub/OpenMeter_Analysis/Venv_OpenMeter/lib/python3.11/site-packages/lightning/fabric/__init__.py:40: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
/Users/maxi/Documents/GitHu

Training: |          | 0/? [00:00<?, ?it/s]

In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 5) Test-Evaluation
# ──────────────────────────────────────────────────────────────────────────────
pred, idx = tft.predict(test_dl, mode="prediction", return_index=True)
y_true    = test.iloc[idx]["Messwert"].to_numpy()

print("MAE :", mean_absolute_error(y_true, pred))
print("RMSE:", mean_squared_error(y_true, pred, squared=False))



In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 6) Checkpoint speichern
# ──────────────────────────────────────────────────────────────────────────────
os.makedirs("/content", exist_ok=True)
trainer.save_checkpoint("/content/tft_stromverbrauch.ckpt")
print("Checkpoint gespeichert → /content/tft_stromverbrauch.ckpt")

In [None]:
-----------


In [None]:
import pandas as pd, torch

from lightning.pytorch.loggers import CSVLogger
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer, QuantileLoss
from pytorch_forecasting.data import GroupNormalizer
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
DATA_PATH = "/Users/maxi/Desktop/train_data.csv"
TEST_PATH = "/Users/maxi/Desktop/test_data.csv"

train = pd.read_csv(DATA_PATH, nrows=50_000, parse_dates=["Zeitstempel"])
test  = pd.read_csv(TEST_PATH,  nrows=10_000, parse_dates=["Zeitstempel"])

In [None]:
print(train)
print(test)

In [None]:
# --- Daten zusammenführen -------------------------------------------------
df = pd.concat([train.assign(split="train"), test.assign(split="test")], ignore_index=True)
df["meter_id"] = df["location_id"]

# --- Zeitindex ------------------------------------------------------------
origin = df["Zeitstempel"].min()
df["time_idx"] = ((df["Zeitstempel"] - origin).dt.total_seconds() // 3600).astype(int)

# --- Zeitliche Kategorien -------------------------------------------------
df["month"]   = df["Zeitstempel"].dt.month.astype(str)
df["weekday"] = df["Zeitstempel"].dt.weekday.astype(str)
df["hour"]    = df["Zeitstempel"].dt.hour.astype(str)

# --- Feiertage / Ferien ---------------------------------------------------
df["is_holiday"] = df["Ferientyp"].notna().astype(str)
df["Ferientyp"]  = df["Ferientyp"].fillna("None").astype(str)

# --- Sonstige Kategoricals -----------------------------------------------
df["post_code"] = df["post_code"].astype(str)
df["city"]      = df["city"].astype(str)

# --- Gemeinsame Kategorie-Encodierung -------------------------------------
cat_cols = ["city", "post_code", "Ferientyp", "month", "weekday", "hour", "is_holiday"]
for col in cat_cols:
    df[col] = pd.Categorical(df[col])     # gemeinsamer Category-Pool

# ── Feature-Listen anpassen ─────────────────────────────────────────────
static_reals = [
    "area", "Bruttoverdienst", "Durchschnittsalter der Bevölkerung",
    "Erwerbstätige", "Kaufkraft", "Medianeinkommen",
    "Neubauwohnungen in Ein- und Zweifamilienhäusern",
    "Neubauwohnungen je Einwohner", "Schuldnerquote",
    "Siedlungsdichte in km²"
]

static_categoricals            = ["city", "post_code"]
time_varying_known_categoricals = ["is_holiday", "Ferientyp"]        # ← Monat etc. raus
time_varying_known_reals        = ["time_idx", "month", "weekday", "hour",
                                   "cloud_cover_total", "humidity"]

# ▸ alle Reals ohne Lücken -------------------------------------------------
fill_cols = ["cloud_cover_total", "humidity"] + static_reals

for col in fill_cols:
    if col in df.columns:
        df[col] = df[col].fillna(method="ffill").fillna(method="bfill")  # 1. vor/zurück füllen
        df[col] = df[col].fillna(df[col].median())                       # 2. falls Brand-Lücken

# --- Zurück in Train/Test split ------------------------------------------
train = df[df["split"] == "train"].drop(columns="split")
test  = df[df["split"] == "test"].drop(columns="split")


In [None]:
ENC_LEN, PRED_LEN = 72, 24
training = TimeSeriesDataSet(
    train,
    time_idx="time_idx",
    target="Messwert",
    group_ids=["meter_id"],
    static_categoricals           = static_categoricals,
    static_reals                  = static_reals,
    time_varying_known_categoricals = time_varying_known_categoricals,
    time_varying_known_reals        = time_varying_known_reals,
    time_varying_unknown_reals      = ["Messwert"],
    max_encoder_length=ENC_LEN,
    max_prediction_length=PRED_LEN,
    target_normalizer=GroupNormalizer(groups=["meter_id"]),
    allow_missing_timesteps=True,
)

test_ds = TimeSeriesDataSet.from_dataset(
    training, test, stop_randomization=True
)

batch = 128
train_dl = training.to_dataloader(train=True, batch_size=batch, num_workers=12, shuffle=True)
test_dl = test_ds.to_dataloader(train=False, batch_size=batch, num_workers=12)

In [None]:
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor

seed_everything(42)

tft = TemporalFusionTransformer.from_dataset(
    training,
    hidden_size=16,
    attention_head_size=2,
    hidden_continuous_size=8,
    dropout=0.2,
    learning_rate=0.001,
    loss=QuantileLoss(),
)

early_stop = EarlyStopping(
    monitor="val_loss",   # <- überwachte Metrik
    patience=3,
    mode="min",
    verbose=True,
)

# Logger erzeugen
logger = CSVLogger("lightning_logs", name="TFT-Projekt")
 
trainer = Trainer(
    max_epochs=30,
    accelerator="auto",
    devices=1,
    precision=32,
    gradient_clip_val=0.1,
    callbacks=[early_stop, LearningRateMonitor("epoch")],
    logger=logger,  # <--- Logging aktiv!
)
 
trainer.fit(tft, train_dataloaders=train_dl, val_dataloaders=val_dl)

In [None]:
test_ds  = TimeSeriesDataSet.from_dataset(training, test, stop_randomization=True)
test_dl  = test_ds.to_dataloader(train=False, batch_size=batch)
pred, ix = tft.predict(test_dl, mode="prediction", return_index=True)
y_true   = test.iloc[ix]["Messwert"].to_numpy()

print("MAE :", mean_absolute_error(y_true, pred))
print("RMSE:", mean_squared_error(y_true, pred, squared=False))

In [None]:
trainer.save_checkpoint("/content/tft_stromverbrauch.ckpt")
print("Checkpoint gespeichert.")