In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

try:
    import torch  # type: ignore[import]
    import torch.nn as nn  # type: ignore[import]
    from torch.utils.data import TensorDataset, DataLoader, random_split  # type: ignore[import]
except ImportError as exc:
    raise ImportError(
        "PyTorch is required for this notebook. Please install torch."
    ) from exc

try:
    from sklearn.preprocessing import MinMaxScaler, StandardScaler  # type: ignore[import]
except ImportError as exc:
    raise ImportError(
        "scikit-learn is required for this notebook. Please install scikit-learn."
    ) from exc

torch.manual_seed(42)

import sys
root_dir = os.path.abspath("..")
if root_dir not in sys.path:
    sys.path.append(root_dir)

from src.config import (
    CMAPSS_DATASETS,
    MAX_RUL,
    SEQUENCE_LENGTH,
    HIDDEN_SIZE,
    NUM_LAYERS,
    OUTPUT_SIZE,
    LEARNING_RATE,
    NUM_EPOCHS,
    GLOBAL_FEATURE_COLS,
    GLOBAL_DROP_COLS 
)

from src.data_loading import load_cmapps_subset
from src.additional_features import create_physical_features
from src.eol_full_lstm import (
    build_full_eol_sequences_from_df,
    create_full_dataloaders,
    EOLFullLSTM,
    train_eol_full_lstm,
    evaluate_eol_full_lstm,
)


In [3]:
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cpu


# Full-Trajectory LSTM für EOL-Prediction (FD001-FD004)

Dieses Notebook trainiert ein LSTM-Modell für EOL-Prediction auf den kompletten Trajektorien aller Engines (nicht nur Tail-Samples).

**Features:**
- Sliding-Window über alle Zyklen jeder Engine
- RUL wird auf max_rul=125 gecappt (NASA-Style)
- Engine-basierter Split (keine Data Leakage)
- NASA PHM08 Score Evaluation


In [4]:
# ===================================================================
# 1. Daten laden: FD001-FD004
# ===================================================================
print("=" * 60)
print("[1] Loading CMAPSS Data (FD001-FD004)")
print("=" * 60)

dfs = []
for fd_id in ["FD001", "FD002", "FD003", "FD004"]:
    print(f"Loading {fd_id}...")
    df_train, _, _ = load_cmapps_subset(
        fd_id,
        max_rul=None,  # Kein Clipping im Train (wird später in build_full_eol_sequences gecappt)
        clip_train=False,
        clip_test=True,  # Test weiter clampen für NASA-Score
    )
    df_train = create_physical_features(df_train)
    # FD_ID als numerisches Feature (0=FD001, 1=FD002, 2=FD003, 3=FD004)
    # Wichtig: FD_ID muss numerisch sein, da es als Feature verwendet wird
    fd_id_map = {"FD001": 0, "FD002": 1, "FD003": 2, "FD004": 3}
    df_train["FD_ID"] = fd_id_map[fd_id]
    dfs.append(df_train)
    print(f"  {fd_id}: {len(df_train)} rows, {df_train['UnitNumber'].nunique()} engines")

df_train_global = pd.concat(dfs, ignore_index=True)
print(f"\nTotal: {len(df_train_global)} rows, {df_train_global['UnitNumber'].nunique()} engines")


[1] Loading CMAPSS Data (FD001-FD004)
Loading FD001...
New columns successfully added. Current number of columns: 33
   Effizienz_HPC_Proxy  EGT_Drift  Fan_HPC_Ratio  RUL
0             0.941013        0.2       0.403737  191
1             0.943169        0.2       0.403406  190
2             0.942554       -1.8       0.404505  189
3             0.943025        0.2       0.405834  188
4             0.942581        1.2       0.405831  187
  FD001: 20631 rows, 100 engines
Loading FD002...
[ConditionID] Found 7 unique (S1_r, S2_r, S3_r) combos in TRAIN.
[ConditionID] Train ConditionIDs: [0 1 2 3 4 5 6]
[ConditionID] Test  ConditionIDs: [0 1 2 3 4 5 6]
New columns successfully added. Current number of columns: 33
   Effizienz_HPC_Proxy  EGT_Drift  Fan_HPC_Ratio  RUL
0             0.940506        6.8       0.408741  148
1             0.941593        2.8       0.406364  147
2             0.934608      -18.2       0.427536  146
3             0.944099        1.8       0.405833  145
4           

In [5]:
# ===================================================================
# 2. Feature-Liste definieren
# ===================================================================
print("\n" + "=" * 60)
print("[2] Defining Feature Columns")
print("=" * 60)

# Numerische Features aus GLOBAL_FEATURE_COLS
numeric_cols = df_train_global[GLOBAL_FEATURE_COLS].select_dtypes(
    include=["number"]
).columns.tolist()

feature_cols = numeric_cols
print(f"Using {len(feature_cols)} features:")
if len(feature_cols) > 10:
    print(f"  Features: {', '.join(feature_cols[:10])}...")
else:
    print(f"  Features: {', '.join(feature_cols)}")



[2] Defining Feature Columns
Using 25 features:
  Features: Setting1, Setting2, Setting3, Sensor2, Sensor3, Sensor4, Sensor6, Sensor7, Sensor8, Sensor9...


In [6]:
# ===================================================================
# 3. Full-Trajectory Sequenzen bauen
# ===================================================================
print("\n" + "=" * 60)
print("[3] Building Full-Trajectory Sequences")
print("=" * 60)

X_full, y_full, unit_ids_full = build_full_eol_sequences_from_df(
    df=df_train_global,
    feature_cols=feature_cols,
    past_len=30,
    max_rul=125,  # NASA-Style: RUL wird auf 125 gecappt
    unit_col="UnitNumber",
    cycle_col="TimeInCycles",
    rul_col="RUL",
)



[3] Building Full-Trajectory Sequences
[build_full_eol_sequences_from_df] Summary
Num units: 260
Using past_len=30, max_rul=125
Num feature cols: 25
X shape: torch.Size([152819, 30, 25]), y shape: torch.Size([152819]), unit_ids shape: torch.Size([152819])
RUL stats (capped at 125): min=0.00, max=125.00, mean=88.47, std=41.50


In [7]:
# ===================================================================
# 4. Dataloaders erstellen (Engine-basierter Split)
# ===================================================================
print("\n" + "=" * 60)
print("[4] Creating DataLoaders (Engine-based Split)")
print("=" * 60)

train_loader, val_loader, scaler, train_unit_ids, val_unit_ids = create_full_dataloaders(
    X=X_full,
    y=y_full,
    unit_ids=unit_ids_full,
    batch_size=256,
    engine_train_ratio=0.8,
    shuffle_engines=True,
    random_seed=42,
)



[4] Creating DataLoaders (Engine-based Split)
[create_full_dataloaders] Engine-based split
Total units: 260
Train units: 208, Val units: 52
Train samples: 120764, Val samples: 32055
Feature scaling: StandardScaler (fitted on train only)


In [8]:
# ===================================================================
# 5. Modell initialisieren
# ===================================================================
print("\n" + "=" * 60)
print("[5] Initializing EOLFullLSTM Model")
print("=" * 60)

model = EOLFullLSTM(
    input_dim=len(feature_cols),
    hidden_dim=64,
    num_layers=2,
    dropout=0.1,
    bidirectional=False,
)

print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")



[5] Initializing EOLFullLSTM Model
Model parameters: 60,801


In [None]:
# ===================================================================
# 6. Training
# ===================================================================
print("\n" + "=" * 60)
print("[6] Training EOLFullLSTM")
print("=" * 60)

model, history = train_eol_full_lstm(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=80,
    lr=1e-4,
    weight_decay=1e-4,
    patience=8,
    device=device,
    results_dir="../results/eol_full_lstm",
    run_name="fd001_fd004",
)



[6] Training EOLFullLSTM
[train_eol_full_lstm] Training Configuration
Learning Rate: 0.0001
Weight Decay: 0.0001
Patience: 8
Device: cpu
[EOL-Full-LSTM] Epoch 1/80 - train_loss: 6951.1298, val_loss: 4621.0978, val_RMSE: 68.1771, lr: 1.00e-04
[EOL-Full-LSTM] Epoch 2/80 - train_loss: 3235.2106, val_loss: 2148.5132, val_RMSE: 46.3661, lr: 1.00e-04
[EOL-Full-LSTM] Epoch 3/80 - train_loss: 1819.3490, val_loss: 1699.7063, val_RMSE: 41.0469, lr: 1.00e-04
[EOL-Full-LSTM] Epoch 4/80 - train_loss: 1354.1923, val_loss: 1070.8498, val_RMSE: 32.5585, lr: 1.00e-04
[EOL-Full-LSTM] Epoch 5/80 - train_loss: 973.4891, val_loss: 964.8297, val_RMSE: 31.0449, lr: 1.00e-04
[EOL-Full-LSTM] Epoch 6/80 - train_loss: 870.1521, val_loss: 867.2064, val_RMSE: 29.4337, lr: 1.00e-04
[EOL-Full-LSTM] Epoch 7/80 - train_loss: 812.3348, val_loss: 830.0580, val_RMSE: 28.8160, lr: 1.00e-04
[EOL-Full-LSTM] Epoch 8/80 - train_loss: 781.6672, val_loss: 785.0938, val_RMSE: 28.0542, lr: 1.00e-04
[EOL-Full-LSTM] Epoch 9/80 - t

In [10]:
# ===================================================================
# 7. Evaluation
# ===================================================================
print("\n" + "=" * 60)
print("[7] Evaluating EOLFullLSTM")
print("=" * 60)

metrics = evaluate_eol_full_lstm(
    model=model,
    val_loader=val_loader,
    device=device,
)



[7] Evaluating EOLFullLSTM
[evaluate_eol_full_lstm] Pointwise Metrics
MSE: 463.5554
RMSE: 21.5303 cycles
MAE: 15.8168 cycles
Bias: -2.5599 cycles
R²: 0.7301
[evaluate_eol_full_lstm] EOL/NASA Metrics (per Engine, last cycle)
RMSE_eol: 17.4663 cycles
MAE_eol: 12.9049 cycles
Bias_eol: 11.6153 cycles
NASA Score (sum): 995.09
NASA Score (mean): 19.1363
Num engines: 52


In [11]:
# ===================================================================
# 8. Zusammenfassung
# ===================================================================
print("\n" + "=" * 60)
print("[8] Final Summary")
print("=" * 60)
print("Pointwise Metrics (all validation samples):")
print(f"  RMSE: {metrics['pointwise']['rmse']:.4f} cycles")
print(f"  MAE: {metrics['pointwise']['mae']:.4f} cycles")
print(f"  Bias: {metrics['pointwise']['bias']:.4f} cycles")
print(f"  R²: {metrics['pointwise']['r2']:.4f}")
print(f"  NASA Score (pointwise, sum): {metrics['nasa_pointwise']['score_sum']:.2f}")
print(f"  NASA Score (pointwise, mean): {metrics['nasa_pointwise']['score_mean']:.4f}")

if "eol" in metrics:
    print("\nEOL Metrics (per engine, last cycle):")
    print(f"  RMSE_eol: {metrics['eol']['rmse']:.4f} cycles")
    print(f"  MAE_eol: {metrics['eol']['mae']:.4f} cycles")
    print(f"  Bias_eol: {metrics['eol']['bias']:.4f} cycles")
    print(f"  NASA Score (EOL, sum): {metrics['eol']['nasa_score_sum']:.2f}")
    print(f"  NASA Score (EOL, mean): {metrics['eol']['nasa_score_mean']:.4f}")
    print(f"  Num engines: {metrics['eol']['num_engines']}")

print("=" * 60)
print("Training complete!")
print("=" * 60)



[8] Final Summary
Pointwise Metrics (all validation samples):
  RMSE: 21.5303 cycles
  MAE: 15.8168 cycles
  Bias: -2.5599 cycles
  R²: 0.7301
  NASA Score (pointwise, sum): 605393.50
  NASA Score (pointwise, mean): 18.8861

EOL Metrics (per engine, last cycle):
  RMSE_eol: 17.4663 cycles
  MAE_eol: 12.9049 cycles
  Bias_eol: 11.6153 cycles
  NASA Score (EOL, sum): 995.09
  NASA Score (EOL, mean): 19.1363
  Num engines: 52
Training complete!
