# 📈 Autoregressive 12-Monats-Vorhersage mit GraphConvRNN
Dieses Notebook trainiert ein GraphConvRNN-Modell auf 2021-2022, validiert auf 2023 und sagt die Kanten-Gewichte für alle Monate 2024 voraus.

## 1. 📦 Installieren und importieren

In [9]:
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv
from torch_geometric.data import DataLoader, Batch
import networkx as nx
from torch_geometric.utils import from_networkx
import glob
import os
import pandas as pd

## 2. 🏗️ Definition von GraphConvRNN mit Hidden State

In [10]:
class GraphConvRNN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, rnn_layers=1):
        super().__init__()
        self.gcn = GCNConv(in_channels, hidden_channels)
        self.gru = nn.GRU(hidden_channels, hidden_channels, rnn_layers, batch_first=True)
        self.edge_mlp = nn.Linear(hidden_channels, out_channels)  # NEU: für Kanten

    def forward(self, x, edge_index, edge_attr=None, hidden=None):
        # GCN-Schicht
        x = self.gcn(x, edge_index)
        x = torch.relu(x)
        
        # Zeitliche Sequenz
        x_seq = x.unsqueeze(0)  # [1, num_nodes, hidden_dim]
        out_seq, hidden = self.gru(x_seq, hidden)
        node_embeddings = out_seq.squeeze(0)  # [num_nodes, hidden_dim]

        # ➔ Wichtig: Erzeuge Kantenrepräsentationen
        source, target = edge_index
        edge_embeddings = (node_embeddings[source] + node_embeddings[target]) / 2

        # MLP auf Kanten
        edge_output = self.edge_mlp(edge_embeddings)
        
        return edge_output.squeeze(), hidden

## 3. 🗂️ Daten laden und vorbereiten

In [11]:
# Funktion um GraphML-Dateien zu laden
def load_graphml(year):
    files = glob.glob(f'../../../data/graphml/{year}/*.graphml')
    data_list = []
    for f in sorted(files):
        G = nx.read_graphml(f)
        data = from_networkx(G, group_node_attrs=['lat', 'lon'], group_edge_attrs=['id','month','speed_rel','tracks','year'])
        data.y = data.edge_attr[:,3]
        data.edge_attr = torch.cat([data.edge_attr[:,:3], data.edge_attr[:,4:]], dim=1)
        data_list.append(data)
    return data_list

train_list = load_graphml(2021) + load_graphml(2022)
val_list = load_graphml(2023)
test_list = load_graphml(2024)

## 4. ⚙️ Dataloader für Training und Validierung

In [12]:
train_loader = DataLoader(train_list, batch_size=1, shuffle=True)
val_loader = DataLoader(val_list, batch_size=1, shuffle=False)



## 5. 🧠 Modell, Optimizer und Loss

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GraphConvRNN(
    in_channels=train_list[0].num_node_features,
    hidden_channels=64,
    out_channels=1
).to(device)
criterion = nn.MSELoss()

def calculate_rmse(y_true, y_pred):
    """ Root Mean Squared Error """
    return torch.sqrt(torch.mean((y_true - y_pred) ** 2)).item()

def calculate_mape(y_true, y_pred):
    """ Mean Absolute Percentage Error """
    # Hinzufügen eines kleinen Terms (epsilon), um Division durch Null zu vermeiden
    epsilon = 1e-8
    return (torch.mean(torch.abs((y_true - y_pred) / (y_true + epsilon))) * 100).item()

## 6. 🔍 Hyperparameter Tuning

In [14]:
import json
from tsl.metrics.numpy.functional import rmse, mape
import numpy as np

best_val_loss = float('inf')
best_val_rmse = float('inf')
best_val_mape = float('inf')
best_params = {}

for lr in [1e-3, 5e-4]:
    for wd in [0, 1e-5]:
        # Modell neu initialisieren für jede Kombi (wichtig!)
        model = GraphConvRNN(
            in_channels=train_list[0].num_node_features,
            hidden_channels=64,
            out_channels=1
        ).to(device)
        
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
        
        # Training
        for epoch in range(1, 21):
            model.train()
            for data in train_loader:
                data = data.to(device)
                optimizer.zero_grad()
                out, _ = model(data.x, data.edge_index, data.edge_attr)
                loss = criterion(out.view(-1), data.y.view(-1))
                loss.backward()
                optimizer.step()
        
        # Validation
        model.eval()
        val_loss = 0
        y_true_all = []
        y_pred_all = []

        with torch.no_grad():
            for data in val_loader:
                data = data.to(device)
                out, _ = model(data.x, data.edge_index, data.edge_attr)
                y_true_all.append(data.y.view(-1).cpu().numpy())
                y_pred_all.append(out.view(-1).cpu().numpy())
                val_loss += criterion(out.view(-1), data.y.view(-1)).item()

        val_loss /= len(val_loader)

        # RMSE und MAPE mit NumPy
        y_true_all = np.concatenate(y_true_all)
        y_pred_all = np.concatenate(y_pred_all)
        
        val_rmse = rmse(y_pred_all, y_true_all)
        val_mape = mape(y_pred_all, y_true_all)

        # Beste Hyperparameter aktualisieren, wenn bessere Validierung
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_val_rmse = val_rmse
            best_val_mape = val_mape
            best_params = {
                'lr': lr,
                'weight_decay': wd,
                'val_loss': best_val_loss,
                'val_rmse': best_val_rmse,
                'val_mape': best_val_mape
            }

# Speichern der besten Kombination als JSON
with open('best_hyperparams.json', 'w') as f:
    json.dump(best_params, f, indent=4)

print('Beste Hyperparameter gefunden:')
print(json.dumps(best_params, indent=4))

KeyboardInterrupt: 

## 7. 🏋️ Finales Training mit besten Parametern

In [ ]:
optimizer = torch.optim.Adam(model.parameters(), **best_params)
for epoch in range(1, 31):
    model.train()
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out, _ = model(data.x, data.edge_index, data.edge_attr)
        loss = criterion(out.view(-1), data.y.view(-1))
        loss.backward()
        optimizer.step()

## 8. 🔮 Autoregressive Forecast für 2024

In [ ]:
# Beginne mit dem letzten Monat von 2023
current_graph = val_list[-1].clone().to(device)
hidden = None
forecasts = []
for i in range(12):
    model.eval()
    with torch.no_grad():
        out, hidden = model(current_graph.x, current_graph.edge_index, current_graph.edge_attr, hidden)
    forecasts.append(out.cpu())
    # Update für nächsten Monat
    new_edge_attr = torch.cat([current_graph.edge_attr.cpu(), out.unsqueeze(1)], dim=1)
    new_edge_attr = torch.cat([new_edge_attr[:,:3], new_edge_attr[:,4:]], dim=1)
    current_graph.edge_attr = new_edge_attr.to(device)
    current_graph.y = out.to(device)

## 9. 💾 Ergebnisse speichern

In [ ]:
df = pd.DataFrame([f.numpy() for f in forecasts]).T
df.columns = [f'Month_{i+1}' for i in range(12)]
df.to_csv('forecasts_2024.csv', index=False)
print('Forecasts gespeichert als forecasts_2024.csv')