In [1]:
from pathlib import Path
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
import matplotlib.pyplot as plt

#Generisches DL Model zum Einlesen der Modelle
class GenericModel(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_layers, activation="relu", dropout=0.0):
        super().__init__()

        layers = []
        in_dim = input_dim

        act = {
            "relu": nn.ReLU,
            "tanh": nn.Tanh,
            "sigmoid": nn.Sigmoid
        }[activation]

        for h in hidden_layers:
            layers.append(nn.Linear(in_dim, h))
            layers.append(act())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            in_dim = h

        layers.append(nn.Linear(in_dim, output_dim))

        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

#Gewichtete Loss Funktion
class WeightedMSELoss(nn.Module):
    def __init__(self, weights):
        super().__init__()
        self.weights = torch.tensor(weights)

    def forward(self, pred, target):
        loss = (pred - target) ** 2
        loss = loss * self.weights
        return loss.mean()

#Optimizer Auswahlfunktion
def make_optimizer(model, name, params):
    name = name.lower()

    if name == "adam":
        return optim.Adam(model.parameters(), **params)
    elif name == "adamw":
        return optim.AdamW(model.parameters(), **params)
    elif name == "sgd":
        return optim.SGD(model.parameters(), **params)
    elif name == "radam":
        return torch.optim.RAdam(model.parameters(), **params)
    else:
        raise ValueError(f"Unknown optimizer: {name}")




#Dateipfad initialisieren
#Project root (assumes notebook is in /notebooks)
ROOT = Path.cwd().resolve().parents[0]
sys.path.insert(0, str(ROOT))

#Dateipfad
data_path = ROOT / "data" / "processed"

#Trainingsdaten lesen
data = np.load(os.path.join(data_path, "data_for_training.npz"))

X_raw  = data["X"]        # für Split nach Polaren
X = torch.tensor(data["X_norm"], dtype=torch.float32)
Y = torch.tensor(data["Y_norm"], dtype=torch.float32)

print(f"Loaded data: X {X.shape}, Y {Y.shape}")

#Modelle einlesen
model_info = np.load(os.path.join(data_path,"model_info.npy"), allow_pickle=True).item()

models = {}

for name, cfg in model_info.items():
    model = GenericModel(
        input_dim=cfg["input_dim"],
        output_dim=cfg["output_dim"],
        hidden_layers=cfg["layers"],
        activation=cfg["activation"],
        dropout=cfg["dropout"]
    )
    models[name] = model

#Trainingsparameter
epochs = 1000
batch_size = 256
optimizer_name = "radam"      # "sgd", "radam", "adamw" oder "adam"    Dank obiger Funktion erweiterbar
optimizer_params = {        # learningrate und weight decay
    "lr": 3e-4,
    #"momentum": 0.9,   #Momentum NUR für SGD, wenn gewollt
    #"nesterov": True,  #Nesterov Momentum(NAG) NUR für SGD mit Momentum: "Vorausschauendes Momentum", wenn gewollt
    "weight_decay": 5e-5
}

loss_weights = [1.0, 2.0, 0.2, 0.2]  # CL, CD, Top_Xtr, Bot_Xtr
criterion = WeightedMSELoss(loss_weights)   #gewichtete Loss Funktion
#criterion = nn.MSELoss()   #ungewichtete Loss Funktion

split = "group"    #"random" oder "group"  Zufälliger Split oder Split nach Polaren
train_ratio = 0.8

#Zufälliger Split
if split == "random":
    num_samples = X.shape[0]

    indices = torch.randperm(num_samples)
    train_size = int(train_ratio * num_samples)

    train_idx = indices[:train_size]
    val_idx   = indices[train_size:]

    X_train, Y_train = X[train_idx], Y[train_idx]
    X_val,   Y_val   = X[val_idx],   Y[val_idx]

    print(f"Train samples: {X_train.shape[0]}")
    print(f"Val samples  : {X_val.shape[0]}")

#Split nach Polaren
elif split == "group":
    groups = np.array([
    f"{int(x[0])}_{int(x[1])}_{x[2]:.3f}" for x in X_raw
    ])

    unique_groups = np.unique(groups)
    np.random.shuffle(unique_groups)

    n_train_groups = int(train_ratio * len(unique_groups))

    train_groups = set(unique_groups[:n_train_groups])
    val_groups   = set(unique_groups[n_train_groups:])

    train_idx = [i for i, g in enumerate(groups) if g in train_groups]
    val_idx   = [i for i, g in enumerate(groups) if g in val_groups]

    X_train = X[train_idx]
    Y_train = Y[train_idx]
    X_val   = X[val_idx]
    Y_val   = Y[val_idx]

    print(f"Train samples: {len(train_idx)}")
    print(f"Val samples  : {len(val_idx)}")


else:
    raise ValueError(f"Unknown split: {split}")

#DataLoader
train_loader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(X_train, Y_train),
    batch_size=batch_size,
    shuffle=True
)

val_loader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(X_val, Y_val),
    batch_size=batch_size,
    shuffle=False
)

#Training
save_dir = os.path.join(data_path, "trained_models")
os.makedirs(save_dir, exist_ok=True)

for name, model in models.items():
    print(f"\nTraining model: {name}\nOptimizer: {optimizer_name}")

    optimizer = make_optimizer(model, optimizer_name, optimizer_params)


    train_losses = []
    val_losses=[]
    best_val_loss = float("inf")

    for epoch in range(epochs):
        #Training
        model.train()
        train_loss = 0.0

        for xb, yb in train_loader:
            optimizer.zero_grad()
            y_pred = model(xb)
            loss = criterion(y_pred, yb)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)

        #Validation
        model.eval()
        val_loss = 0.0

        with torch.no_grad():
            for xb, yb in val_loader:
                pred = model(xb)
                loss = criterion(pred, yb)
                val_loss += loss.item()

        val_loss /= len(val_loader)

        train_losses.append(train_loss)
        val_losses.append(val_loss)

        if (epoch + 1) % 50 == 0:
            print(
                f"Epoch {epoch+1:4d} | "
                f"Train: {train_loss:.4e} | "
                f"Val: {val_loss:.4e}"
            )

        #   Gewichte der besten Epoche speichern
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(
                model.state_dict(),
                os.path.join(save_dir, f"{name}_best.pth")
            )

    #   Endgewichte speichern
    torch.save(
        model.state_dict(),
        os.path.join(save_dir, f"{name}_weights.pth")
    )

    #training Plots generieren und speichern
    plt.figure()
    plt.plot(train_losses, label="Train")
    plt.plot(val_losses, label="Validation")
    plt.yscale("log")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"{name} loss")
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(save_dir, f"{name}_loss.png"))
    plt.close()



    print(f"✓ Best val loss for {name}: {best_val_loss:.4e}")
    print(f"✓ Saved weights for {name}")

#Beispielcode fürs spätere Laden:
#model = models[medium]    #hier das rekonstruiere Model angeben
#model.load_state_dict(torch.load("medium_weights.pth"))
#model.eval()'''


Loaded data: X torch.Size([21818, 4]), Y torch.Size([21818, 4])
Train samples: 17491
Val samples  : 4327

Training model: small
Optimizer: radam
Epoch   50 | Train: 2.4307e-01 | Val: 2.4731e-01
Epoch  100 | Train: 2.1013e-01 | Val: 2.0941e-01
Epoch  150 | Train: 2.0059e-01 | Val: 1.9450e-01
Epoch  200 | Train: 1.8820e-01 | Val: 1.8461e-01
Epoch  250 | Train: 1.8155e-01 | Val: 1.7655e-01
Epoch  300 | Train: 1.7411e-01 | Val: 1.6785e-01
Epoch  350 | Train: 1.6906e-01 | Val: 1.6076e-01
Epoch  400 | Train: 1.6317e-01 | Val: 1.5440e-01
Epoch  450 | Train: 1.5899e-01 | Val: 1.4906e-01
Epoch  500 | Train: 1.5492e-01 | Val: 1.4370e-01
Epoch  550 | Train: 1.5053e-01 | Val: 1.3878e-01
Epoch  600 | Train: 1.4672e-01 | Val: 1.3418e-01
Epoch  650 | Train: 1.4330e-01 | Val: 1.3014e-01
Epoch  700 | Train: 1.4099e-01 | Val: 1.2651e-01
Epoch  750 | Train: 1.3707e-01 | Val: 1.2339e-01
Epoch  800 | Train: 1.3541e-01 | Val: 1.1960e-01
Epoch  850 | Train: 1.3238e-01 | Val: 1.1684e-01
Epoch  900 | Train: 1.