# **1. GLOBAL SETUP, PATHS & ARTIFACT VERIFICATION (FD001–FD004)**

In [2]:
# 1. GLOBAL SETUP, PATHS & ARTIFACT VERIFICATION (FD001–FD004)

import os
import random
import json
import numpy as np
import pandas as pd

import joblib
from tensorflow import keras

# 1. Logger
def log(msg):
    print(f"[INFO] {msg}")


# 2. Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
log("Seed fixed at 42 for reproducibility.")


# 3. Google Drive Mount

try:
    from google.colab import drive
    log("Colab detected — mounting Google Drive...")
    drive.mount('/content/drive')
    log("Google Drive mounted successfully.")
except:
    log("Not running in Colab — skipping mount.")


# 4. BASE CMAPSS DIRECTORY
CMAPSS_BASE = "/content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps"
log(f"CMAPSS Base Directory: {CMAPSS_BASE}")

# 5. RAW DATASET PATHS (FD001–FD004)
FD001_TRAIN_PATH = f"{CMAPSS_BASE}/train_FD001.txt"
FD001_TEST_PATH  = f"{CMAPSS_BASE}/test_FD001.txt"
FD001_RUL_PATH   = f"{CMAPSS_BASE}/RUL_FD001.txt"

FD002_TRAIN_PATH = f"{CMAPSS_BASE}/train_FD002.txt"
FD002_TEST_PATH  = f"{CMAPSS_BASE}/test_FD002.txt"
FD002_RUL_PATH   = f"{CMAPSS_BASE}/RUL_FD002.txt"

FD003_TRAIN_PATH = f"{CMAPSS_BASE}/train_FD003.txt"
FD003_TEST_PATH  = f"{CMAPSS_BASE}/test_FD003.txt"
FD003_RUL_PATH   = f"{CMAPSS_BASE}/RUL_FD003.txt"

FD004_TRAIN_PATH = f"{CMAPSS_BASE}/train_FD004.txt"
FD004_TEST_PATH  = f"{CMAPSS_BASE}/test_FD004.txt"
FD004_RUL_PATH   = f"{CMAPSS_BASE}/RUL_FD004.txt"


# 6. MODEL, SCALER, CONFIG PATHS — EXACTLY AS PER SAVED FILES

# ===== FD001 =====
FD001_MODELS_DIR = f"{CMAPSS_BASE}/models_fd001"
FD001_MODEL_PATH = f"{FD001_MODELS_DIR}/FD001_GRU_BASE_BEST.keras"
FD001_FEATURE_SCALER_PATH = f"{FD001_MODELS_DIR}/FD001_feature_scaler_fd001.pkl"
FD001_RUL_SCALER_PATH     = f"{FD001_MODELS_DIR}/FD001_rul_scaler_fd001.pkl"
FD001_CONFIG_PATH         = f"{FD001_MODELS_DIR}/FD001_config.json"

# ===== FD002 =====
FD002_MODEL_PATH          = "/content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMAPS_MODELS/FD002_Hybrid_LSTM_GRU_seq100.h5"
FD002_CONFIG_DIR          = f"{CMAPSS_BASE}/FD002_CONFIG"
FD002_CONFIG_PATH         = f"{FD002_CONFIG_DIR}/FD002_config.json"
FD002_FEATURE_SCALER_PATH = f"{FD002_CONFIG_DIR}/FD002_feature_scaler.gz"
FD002_RUL_SCALER_PATH     = f"{FD002_CONFIG_DIR}/FD002_rul_scaler.gz"

# ===== FD003 =====
FD003_MODELS_DIR          = f"{CMAPSS_BASE}/models_fd003"
FD003_MODEL_PATH          = f"{FD003_MODELS_DIR}/FD003_GRU_TUNED_BEST.keras"
FD003_FEATURE_SCALER_PATH = f"{FD003_MODELS_DIR}/FD003_feature_scaler_fd003.pkl"
FD003_RUL_SCALER_PATH     = f"{FD003_MODELS_DIR}/FD003_rul_scaler_fd003.pkl"
FD003_CONFIG_PATH         = f"{FD003_MODELS_DIR}/FD003_config.json"

# ===== FD004 =====
FD004_MODELS_DIR          = f"{CMAPSS_BASE}/models_fd004"
FD004_MODEL_PATH          = f"{FD004_MODELS_DIR}/FD004_GRU_100_TUNED_BEST.keras"
FD004_FEATURE_SCALER_PATH = f"{FD004_MODELS_DIR}/FD004_feature_scaler_fd004.pkl"
FD004_RUL_SCALER_PATH     = f"{FD004_MODELS_DIR}/FD004_rul_scaler_fd004.pkl"
FD004_CONFIG_PATH         = f"{FD004_MODELS_DIR}/FD004_config.json"

# 7. VERIFYING ALL REQUIRED FILES EXIST
missing = []

def check(path, label):
    if os.path.exists(path):
        print(f"[OK]   {label}: {path}")
    else:
        print(f"[MISS] {label}: {path}")
        missing.append((label, path))

log("Checking raw dataset files...")
check(FD001_TRAIN_PATH, "FD001 Train")
check(FD001_TEST_PATH,  "FD001 Test")
check(FD001_RUL_PATH,   "FD001 RUL")

check(FD002_TRAIN_PATH, "FD002 Train")
check(FD002_TEST_PATH,  "FD002 Test")
check(FD002_RUL_PATH,   "FD002 RUL")

check(FD003_TRAIN_PATH, "FD003 Train")
check(FD003_TEST_PATH,  "FD003 Test")
check(FD003_RUL_PATH,   "FD003 RUL")

check(FD004_TRAIN_PATH, "FD004 Train")
check(FD004_TEST_PATH,  "FD004 Test")
check(FD004_RUL_PATH,   "FD004 RUL")

log("\nChecking model/scaler/config files...")

# FD001
check(FD001_MODEL_PATH,           "FD001 Best Model")
check(FD001_FEATURE_SCALER_PATH,  "FD001 Feature Scaler")
check(FD001_RUL_SCALER_PATH,      "FD001 RUL Scaler")
check(FD001_CONFIG_PATH,          "FD001 Config")

# FD002
check(FD002_MODEL_PATH,           "FD002 Best Model")
check(FD002_FEATURE_SCALER_PATH,  "FD002 Feature Scaler")
check(FD002_RUL_SCALER_PATH,      "FD002 RUL Scaler")
check(FD002_CONFIG_PATH,          "FD002 Config")

# FD003
check(FD003_MODEL_PATH,           "FD003 Best Model")
check(FD003_FEATURE_SCALER_PATH,  "FD003 Feature Scaler")
check(FD003_RUL_SCALER_PATH,      "FD003 RUL Scaler")
check(FD003_CONFIG_PATH,          "FD003 Config")

# FD004
check(FD004_MODEL_PATH,           "FD004 Best Model")
check(FD004_FEATURE_SCALER_PATH,  "FD004 Feature Scaler")
check(FD004_RUL_SCALER_PATH,      "FD004 RUL Scaler")
check(FD004_CONFIG_PATH,          "FD004 Config")


# 8. FINAL REPORT
print("\n================== SUMMARY ==================")
if len(missing) == 0:
    print("All required dataset files, models, scalers, and configs FOUND.")
else:
    print(" Missing files detected:")
    for label, path in missing:
        print(f" - {label}: {path}")
    raise FileNotFoundError("Some required files are missing. Fix paths or upload files before continuing.")

log("Environment setup complete")


[INFO] Seed fixed at 42 for reproducibility.
[INFO] Colab detected — mounting Google Drive...
Mounted at /content/drive
[INFO] Google Drive mounted successfully.
[INFO] CMAPSS Base Directory: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps
[INFO] Checking raw dataset files...
[OK]   FD001 Train: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/train_FD001.txt
[OK]   FD001 Test: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/test_FD001.txt
[OK]   FD001 RUL: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/RUL_FD001.txt
[OK]   FD002 Train: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/train_FD002.txt
[OK]   FD002 Test: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/test_FD002.txt
[OK]   FD002 RUL: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/RUL_FD002.txt
[OK]   FD003 Train: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/train_FD003.txt
[OK]   FD003 Test: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps

# **2. Load Configs & Inspect Structure**

In [3]:
# 2. LOAD & INSPECT CONFIGS (FD001–FD004)

log("Loading configuration JSONs for FD001–FD004...")

def load_json_config(path, label):
    if not os.path.exists(path):
        raise FileNotFoundError(f"{label} config not found at: {path}")
    with open(path, "r") as f:
        cfg = json.load(f)
    log(f"{label} config loaded. Keys: {list(cfg.keys())}")
    return cfg

cfg_fd001 = load_json_config(FD001_CONFIG_PATH, "FD001")
cfg_fd002 = load_json_config(FD002_CONFIG_PATH, "FD002")
cfg_fd003 = load_json_config(FD003_CONFIG_PATH, "FD003")
cfg_fd004 = load_json_config(FD004_CONFIG_PATH, "FD004")

def print_config_summary(fd_name, cfg):
    print("\n" + "="*70)
    print(f"CONFIG SUMMARY — {fd_name}")
    print("="*70)
    print("All keys:", list(cfg.keys()))

    # Common possible fields
    possible_fields = [
        "best_model_name",
        "best_deep_model_name",
        "model_path",
        "best_deep_model_path",
        "feature_scaler_path",
        "rul_scaler_path",
        "final_feature_columns",
        "sequence_length",
        "sequence_length_nasa",
        "nasa_shift",
        "nasa_max_rul_cap",
        "seq_len",
        "feature_cols",
    ]

    for key in possible_fields:
        if key in cfg:
            val = cfg[key]
            # Avoid printing huge lists fully
            if isinstance(val, list) and len(val) > 10:
                print(f"{key}: list of length {len(val)} (showing first 5) -> {val[:5]}")
            else:
                print(f"{key}: {val}")

print_config_summary("FD001", cfg_fd001)
print_config_summary("FD002", cfg_fd002)
print_config_summary("FD003", cfg_fd003)
print_config_summary("FD004", cfg_fd004)

log("Configs inspected.")


[INFO] Loading configuration JSONs for FD001–FD004...
[INFO] FD001 config loaded. Keys: ['best_model_name', 'model_path', 'feature_scaler_path', 'rul_scaler_path', 'feature_columns', 'final_feature_columns', 'sequence_length', 'nasa_shift', 'nasa_max_rul_cap']
[INFO] FD002 config loaded. Keys: ['best_model_name', 'model_path', 'feature_scaler_path', 'rul_scaler_path', 'feature_columns', 'final_feature_columns', 'sequence_length', 'nasa_shift', 'nasa_max_rul_cap', 'val_rmse', 'val_mae', 'test_rmse', 'test_mae', 'nasa_total_test', 'nasa_mean_test']
[INFO] FD003 config loaded. Keys: ['best_model_name', 'model_path', 'feature_scaler_path', 'rul_scaler_path', 'feature_columns', 'final_feature_columns', 'sequence_length', 'nasa_shift', 'nasa_max_rul_cap']
[INFO] FD004 config loaded. Keys: ['best_deep_model_name', 'best_deep_model_path', 'best_deep_model_target_type', 'sequence_length_nasa', 'feature_scaler_path', 'rul_scaler_path', 'base_path', 'base_feature_columns', 'final_feature_columns'

# **3. Build Unified Bundles (Model + Scalers + Meta for Each FD)**

In [4]:
# 3. BUILD UNIFIED BUNDLES (MODEL + SCALERS + META)

from pprint import pprint

def create_fd_bundle(fd_name: str, cfg: dict):
    """
    Create a unified artefact bundle for a given FD dataset.
    This abstracts away the different key names between FD001–003 vs FD004.
    """
    log(f"\nCreating bundle for {fd_name}...")

    if fd_name in ["FD001", "FD002", "FD003"]:
        model_name        = cfg.get("best_model_name", "UNKNOWN_MODEL")
        model_path        = cfg["model_path"]
        feat_scaler_path  = cfg["feature_scaler_path"]
        rul_scaler_path   = cfg["rul_scaler_path"]
        feature_cols      = cfg["final_feature_columns"]
        seq_len           = int(cfg["sequence_length"])
        nasa_shift        = float(cfg["nasa_shift"])
        nasa_cap          = float(cfg["nasa_max_rul_cap"])

    elif fd_name == "FD004":
        model_name        = cfg.get("best_deep_model_name", "UNKNOWN_MODEL")
        model_path        = cfg["best_deep_model_path"]
        feat_scaler_path  = cfg["feature_scaler_path"]
        rul_scaler_path   = cfg["rul_scaler_path"]
        feature_cols      = cfg["final_feature_columns"]
        seq_len           = int(cfg["sequence_length_nasa"])
        nasa_shift        = float(cfg["nasa_shift"])
        nasa_cap          = float(cfg["nasa_max_rul_cap"])

    else:
        raise ValueError(f"Unknown FD: {fd_name}")

    # Safety checks before loading
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"{fd_name}: model_path does not exist: {model_path}")
    if not os.path.exists(feat_scaler_path):
        raise FileNotFoundError(f"{fd_name}: feature_scaler_path does not exist: {feat_scaler_path}")
    if not os.path.exists(rul_scaler_path):
        raise FileNotFoundError(f"{fd_name}: rul_scaler_path does not exist: {rul_scaler_path}")

    # --------- MODEL LOADING ----------
    log(f"{fd_name}: Loading model from {model_path} (compile=False for inference)...")
    model = keras.models.load_model(model_path, compile=False)

    # --------- SCALER LOADING ----------
    log(f"{fd_name}: Loading feature scaler from {feat_scaler_path}")
    feature_scaler = joblib.load(feat_scaler_path)

    log(f"{fd_name}: Loading RUL scaler from {rul_scaler_path}")
    rul_scaler = joblib.load(rul_scaler_path)

    bundle = {
        "fd_name": fd_name,
        "model_name": model_name,
        "model_path": model_path,
        "model": model,
        "feature_scaler_path": feat_scaler_path,
        "feature_scaler": feature_scaler,
        "rul_scaler_path": rul_scaler_path,
        "rul_scaler": rul_scaler,
        "feature_cols": feature_cols,
        "seq_len": seq_len,
        "nasa_shift": nasa_shift,
        "nasa_max_rul_cap": nasa_cap,
    }

    log(f"{fd_name}: Bundle created successfully.")
    return bundle


# Create bundles for all four datasets
bundle_fd001 = create_fd_bundle("FD001", cfg_fd001)
bundle_fd002 = create_fd_bundle("FD002", cfg_fd002)
bundle_fd003 = create_fd_bundle("FD003", cfg_fd003)
bundle_fd004 = create_fd_bundle("FD004", cfg_fd004)

# Quick summary table for sanity check
summary_rows = []

for b in [bundle_fd001, bundle_fd002, bundle_fd003, bundle_fd004]:
    summary_rows.append({
        "FD": b["fd_name"],
        "Model": b["model_name"],
        "Seq_Len": b["seq_len"],
        "Num_Features": len(b["feature_cols"]),
        "NASA_Shift": b["nasa_shift"],
        "NASA_Max_RUL": b["nasa_max_rul_cap"],
    })

summary_df = pd.DataFrame(summary_rows)
print("\nUNIFIED BUNDLE SUMMARY")
display(summary_df)

log("All FD bundles are ready.")


[INFO] 
Creating bundle for FD001...
[INFO] FD001: Loading model from /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/models_fd001/FD001_GRU_BASE_BEST.keras (compile=False for inference)...
[INFO] FD001: Loading feature scaler from /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/models_fd001/FD001_feature_scaler_fd001.pkl
[INFO] FD001: Loading RUL scaler from /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/models_fd001/FD001_rul_scaler_fd001.pkl
[INFO] FD001: Bundle created successfully.
[INFO] 
Creating bundle for FD002...
[INFO] FD002: Loading model from /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMAPS_MODELS/FD002_Hybrid_LSTM_GRU_seq100.h5 (compile=False for inference)...
[INFO] FD002: Loading feature scaler from /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/FD002_CONFIG/FD002_feature_scaler.gz
[INFO] FD002: Loading RUL scaler from /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/FD002_CONFIG/FD002_rul_scaler.gz
[INFO] FD002: Bun

Unnamed: 0,FD,Model,Seq_Len,Num_Features,NASA_Shift,NASA_Max_RUL
0,FD001,GRU_BASE,30,93,3.0,125.0
1,FD002,HYBRID_LSTM_GRU_SEQ100,100,129,4.0,130.0
2,FD003,GRU_TUNED,30,99,2.0,125.0
3,FD004,GRU_100_TUNED,100,129,12.0,120.0


[INFO] All FD bundles are ready.


# **4. Shared Utilities (NASA Score, RMSE/MAE, Last-Window Sequence Builder)**

In [5]:
# 4. NASA SCORE + METRIC HELPERS + LAST-WINDOW SEQUENCE BUILDER

import numpy as np

# 1. NASA asymmetric score (per FD convention)

def nasa_asymmetric_score(y_true, y_pred):
    """
    Compute NASA's asymmetric RUL scoring.
    Penalizes late predictions more heavily than early ones.
    """
    y_true = np.asarray(y_true).flatten()
    y_pred = np.asarray(y_pred).flatten()

    if y_true.shape != y_pred.shape:
        raise ValueError(f"Shape mismatch in nasa_asymmetric_score: {y_true.shape} vs {y_pred.shape}")

    diffs = y_pred - y_true
    score = 0.0

    for d in diffs:
        if d < 0:
            # Predicted RUL < true RUL -> early replacement (less severe)
            score += np.exp(-d / 13) - 1
        else:
            # Predicted RUL > true RUL -> late replacement (more dangerous)
            score += np.exp(d / 10) - 1

    return score


# 2. RMSE / MAE helpers

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true).flatten()
    y_pred = np.asarray(y_pred).flatten()
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))


def mae(y_true, y_pred):
    y_true = np.asarray(y_true).flatten()
    y_pred = np.asarray(y_pred).flatten()
    return float(np.mean(np.abs(y_true - y_pred)))


# 3. Last-window sequence builder (per engine)

def build_last_window_sequences(df, feature_cols, seq_len, target_col=None):
    """
    Build last-window sequences for each engine in df.

    Assumes df has at least:
      - 'unit' : engine ID
      - 'cycle': time index
      - feature_cols: list of feature columns (already scaled or raw as needed)

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe containing at least 'unit', 'cycle', and feature_cols.
    feature_cols : list[str]
        Feature column names to use in sequences.
    seq_len : int
        Sequence length (number of timesteps per engine).
    target_col : str or None
        If provided and present in df, we will also return the last value
        of this column per engine (e.g., true RUL).

    Returns
    -------
    X_seq : np.ndarray
        Shape (num_engines, seq_len, num_features)
    y_last : np.ndarray or None
        Shape (num_engines,) if target_col is provided and present, else None.
    engine_ids : np.ndarray
        Array of engine IDs for each sequence.
    """
    if "unit" not in df.columns or "cycle" not in df.columns:
        raise KeyError("Dataframe must contain 'unit' and 'cycle' columns.")

    X_list = []
    y_list = []
    engine_ids = []

    for unit_id in sorted(df["unit"].unique()):
        df_unit = df[df["unit"] == unit_id].sort_values("cycle")

        if len(df_unit) < seq_len:
            # For safety, we skip engines that are shorter than seq_len
            continue

        last_window = df_unit.iloc[-seq_len:]  # last 'seq_len' timesteps

        # Extract feature sequence
        X_window = last_window[feature_cols].values  # (seq_len, num_features)
        X_list.append(X_window)
        engine_ids.append(unit_id)

        # Optional target
        if target_col is not None and target_col in df_unit.columns:
            y_last_val = last_window[target_col].iloc[-1]
            y_list.append(y_last_val)

    X_seq = np.array(X_list, dtype=np.float32)
    engine_ids = np.array(engine_ids)

    if target_col is not None and len(y_list) > 0:
        y_last = np.array(y_list, dtype=np.float32)
    else:
        y_last = None

    log(f"Built sequences: X_seq shape = {X_seq.shape}, y_last = {None if y_last is None else y_last.shape}, engines = {len(engine_ids)}")
    return X_seq, y_last, engine_ids


log("NASA score, metrics, and sequence builder ready.")


[INFO] NASA score, metrics, and sequence builder ready.


# **5. Load Test Data + Attach True RUL (FD001–FD004)**

In [6]:
# 5. LOAD TEST DATA + ATTACH TRUE RUL (FD001–FD004)

# Standard CMAPSS column names
cols_cmaps = (
    ["unit", "cycle", "setting1", "setting2", "setting3"] +
    [f"sensor{i}" for i in range(1, 22)]
)

def load_fd_test_with_rul(fd_name, test_path, rul_path, cfg):
    """
    Load CMAPSS test data + RUL file for a given FD, attach true RUL per engine,
    and apply NASA RUL capping as per config.
    """
    log(f"\nLoading test + RUL for {fd_name}...")

    # 1. Load test data
    if not os.path.exists(test_path):
        raise FileNotFoundError(f"{fd_name}: test file not found at {test_path}")
    df_test = pd.read_csv(test_path, sep=r"\s+", header=None)
    df_test = df_test.iloc[:, :26]  # ensure 26 columns
    df_test.columns = cols_cmaps

    # Ensure unit and cycle are integers
    df_test["unit"] = df_test["unit"].astype(int)
    df_test["cycle"] = df_test["cycle"].astype(int)

    # 2. Load RUL file
    if not os.path.exists(rul_path):
        raise FileNotFoundError(f"{fd_name}: RUL file not found at {rul_path}")
    rul_values = pd.read_csv(rul_path, sep=r"\s+", header=None).iloc[:, 0].values

    # 3. Map RUL to engine IDs in sorted order
    engine_ids = sorted(df_test["unit"].unique())
    if len(engine_ids) != len(rul_values):
        raise ValueError(
            f"{fd_name}: mismatch between number of engines in test set "
            f"({len(engine_ids)}) and RUL entries ({len(rul_values)})."
        )

    engine_to_rul = {eid: rul for eid, rul in zip(engine_ids, rul_values)}
    df_test["RUL_true_raw"] = df_test["unit"].map(engine_to_rul)

    # 4. Apply NASA RUL cap from config
    nasa_cap = float(cfg["nasa_max_rul_cap"])
    df_test["RUL_true_capped"] = np.minimum(df_test["RUL_true_raw"], nasa_cap)

    log(f"{fd_name}: test shape = {df_test.shape}, engines = {len(engine_ids)}, NASA RUL cap = {nasa_cap}")
    return df_test


# Load all four test sets with true RUL
test_fd001 = load_fd_test_with_rul("FD001", FD001_TEST_PATH, FD001_RUL_PATH, cfg_fd001)
test_fd002 = load_fd_test_with_rul("FD002", FD002_TEST_PATH, FD002_RUL_PATH, cfg_fd002)
test_fd003 = load_fd_test_with_rul("FD003", FD003_TEST_PATH, FD003_RUL_PATH, cfg_fd003)
test_fd004 = load_fd_test_with_rul("FD004", FD004_TEST_PATH, FD004_RUL_PATH, cfg_fd004)

# Quick sanity checks
print("\nQUICK TEST DF OVERVIEW:")
for name, df in [
    ("FD001", test_fd001),
    ("FD002", test_fd002),
    ("FD003", test_fd003),
    ("FD004", test_fd004),
]:
    print(f"\n{name}: shape = {df.shape}, units = {df['unit'].nunique()}")
    print(df[["unit", "cycle", "RUL_true_raw", "RUL_true_capped"]].head())

log("Test data with true RUL loaded for all FDs.")


[INFO] 
Loading test + RUL for FD001...
[INFO] FD001: test shape = (13096, 28), engines = 100, NASA RUL cap = 125.0
[INFO] 
Loading test + RUL for FD002...
[INFO] FD002: test shape = (33991, 28), engines = 259, NASA RUL cap = 130.0
[INFO] 
Loading test + RUL for FD003...
[INFO] FD003: test shape = (16596, 28), engines = 100, NASA RUL cap = 125.0
[INFO] 
Loading test + RUL for FD004...
[INFO] FD004: test shape = (41214, 28), engines = 248, NASA RUL cap = 120.0

QUICK TEST DF OVERVIEW:

FD001: shape = (13096, 28), units = 100
   unit  cycle  RUL_true_raw  RUL_true_capped
0     1      1           112            112.0
1     1      2           112            112.0
2     1      3           112            112.0
3     1      4           112            112.0
4     1      5           112            112.0

FD002: shape = (33991, 28), units = 259
   unit  cycle  RUL_true_raw  RUL_true_capped
0     1      1            18             18.0
1     1      2            18             18.0
2     1      3 

# **6. Unified Inference Pipeline (Per FD)**

In [7]:
# 6. UNIFIED INFERENCE FUNCTION (FD001–FD004)

def run_fd_inference(fd_name, bundle, df_test):
    """
    Unified inference pipeline for FD001–FD004.
    Steps:
      1. Extract & scale features
      2. Build last-window sequences per engine
      3. Predict RUL
      4. Inverse-scale predictions
      5. Apply NASA calibration
      6. Compute RMSE / MAE / NASA score
      7. Return clean results dataframe + metrics
    """

    log(f"\n===== Running Unified Inference for {fd_name} =====")

    feature_cols = bundle["feature_cols"]
    seq_len      = bundle["seq_len"]
    model        = bundle["model"]
    feat_scaler  = bundle["feature_scaler"]
    rul_scaler   = bundle["rul_scaler"]
    shift        = bundle["nasa_shift"]
    max_rul_cap  = bundle["nasa_max_rul_cap"]

    # 1. Scale features
    if not all(col in df_test.columns for col in feature_cols):
        missing = [c for c in feature_cols if c not in df_test.columns]
        raise KeyError(f"{fd_name}: Missing feature columns: {missing}")

    df_scaled = df_test.copy()
    df_scaled[feature_cols] = feat_scaler.transform(df_scaled[feature_cols])


    # 2. Build last-window sequences per engine
    X_seq, y_true_last, engine_ids = build_last_window_sequences(
        df_scaled,
        feature_cols=feature_cols,
        seq_len=seq_len,
        target_col="RUL_true_capped"
    )

    if y_true_last is None:
        raise RuntimeError(f"{fd_name}: Failed to extract true RUL labels.")


    # 3. Predict RUL (scaled)
    y_pred_scaled = model.predict(X_seq).flatten()


    # 4. Inverse-scale predicted RUL
    y_pred_raw = rul_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()


    # 5. Apply NASA calibration (shift + cap)
    # If shift is positive → subtract (model predicted high, need correction)
    # If shift is negative → add (model predicted low)
    y_pred_cal = y_pred_raw - shift
    y_pred_cal = np.maximum(0, np.minimum(y_pred_cal, max_rul_cap))  # cap to range


    # 6. Compute metrics
    rmse_val = rmse(y_true_last, y_pred_cal)
    mae_val  = mae(y_true_last, y_pred_cal)
    nasa_val = nasa_asymmetric_score(y_true_last, y_pred_cal)
    nasa_mean = nasa_val / len(y_true_last)


    # 7. Build Output DataFrame
    df_out = pd.DataFrame({
        "unit": engine_ids,
        "true_RUL": y_true_last,
        "pred_RUL_raw": y_pred_raw,
        "pred_RUL_calibrated": y_pred_cal,
        "abs_error": np.abs(y_true_last - y_pred_cal),
        "sq_error": (y_true_last - y_pred_cal) ** 2
    })

    log(f"{fd_name}: RMSE={rmse_val:.4f}, MAE={mae_val:.4f}, NASA_mean={nasa_mean:.4f}")
    log(f"{fd_name}: Inference complete for {len(engine_ids)} engines.")

    return df_out, {
        "rmse": rmse_val,
        "mae": mae_val,
        "nasa_total": nasa_val,
        "nasa_mean": nasa_mean,
        "num_engines": len(engine_ids)
    }


log("Unified inference function ready.")


[INFO] Unified inference function ready.


# **7. Add Rolling + Delta Features for Test Sets (FD001–FD004)**

In [8]:
# 7. FEATURE ENGINEERING (ROLLING + DELTA) FOR TEST SETS

log("Defining rolling + delta feature engineering function...")

def add_rolling_and_delta_features(df, sensor_cols, win_short=3, win_long=5):
    """
    Adds rolling mean/std and delta (difference) features per sensor, per engine.
    Same logic as used in the FD001 training pipeline.

    For each sensor in sensor_cols, per engine:
      - {sensor}_roll3_mean, {sensor}_roll3_std
      - {sensor}_roll5_mean, {sensor}_roll5_std
      - {sensor}_delta

    NaNs from rolling std at early cycles are filled with 0.
    """
    df = df.sort_values(["unit", "cycle"]).copy()

    for sensor in sensor_cols:
        grp = df.groupby("unit")[sensor]

        # Rolling windows (short = 3)
        df[f"{sensor}_roll{win_short}_mean"] = (
            grp.rolling(win_short, min_periods=1)
               .mean()
               .reset_index(0, drop=True)
        )
        df[f"{sensor}_roll{win_short}_std"] = (
            grp.rolling(win_short, min_periods=1)
               .std()
               .reset_index(0, drop=True)
        )

        # Rolling windows (long = 5)
        df[f"{sensor}_roll{win_long}_mean"] = (
            grp.rolling(win_long, min_periods=1)
               .mean()
               .reset_index(0, drop=True)
        )
        df[f"{sensor}_roll{win_long}_std"] = (
            grp.rolling(win_long, min_periods=1)
               .std()
               .reset_index(0, drop=True)
        )

        # Delta (cycle-to-cycle difference)
        df[f"{sensor}_delta"] = grp.diff().fillna(0).values

    # Replace NaNs from rolling std at early cycles
    df = df.fillna(0.0)
    return df



# APPLY FEATURE ENGINEERING TO TEST SETS FOR ALL FDs
log("Applying rolling + delta feature engineering to TEST sets...")

# Sensor columns per FD (raw sensors only)
sensor_cols_fd001 = [c for c in test_fd001.columns if c.startswith("sensor")]
sensor_cols_fd002 = [c for c in test_fd002.columns if c.startswith("sensor")]
sensor_cols_fd003 = [c for c in test_fd003.columns if c.startswith("sensor")]
sensor_cols_fd004 = [c for c in test_fd004.columns if c.startswith("sensor")]

test_fd001_fe = add_rolling_and_delta_features(test_fd001, sensor_cols_fd001)
test_fd002_fe = add_rolling_and_delta_features(test_fd002, sensor_cols_fd002)
test_fd003_fe = add_rolling_and_delta_features(test_fd003, sensor_cols_fd003)
test_fd004_fe = add_rolling_and_delta_features(test_fd004, sensor_cols_fd004)

log("Feature engineering complete for TEST sets.")
print("FD001 test FE shape:", test_fd001_fe.shape)
print("FD002 test FE shape:", test_fd002_fe.shape)
print("FD003 test FE shape:", test_fd003_fe.shape)
print("FD004 test FE shape:", test_fd004_fe.shape)


[INFO] Defining rolling + delta feature engineering function...
[INFO] Applying rolling + delta feature engineering to TEST sets...


  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values
  df[f"{sensor}_roll{win_short}_mean"] = (
  df[f"{sensor}_roll{win_short}_std"] = (
  df[f"{sensor}_roll{win_long}_mean"] = (
  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values
  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values
  df[f"{sensor}_roll{win_short}_mean"] = (
  df[f"{sensor}_roll{win_short}_std"] = (
  df[f"{sensor}_roll{win_long}_mean"] = (
  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values
  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values
  df[f"{sensor}_roll{win_short}_mean"] = (
  df[f"{sensor}_roll{win_short}_std"] = (
  df[f"{sensor}_roll{win_long}_mean"] = (
  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values


[INFO] Feature engineering complete for TEST sets.
FD001 test FE shape: (13096, 133)
FD002 test FE shape: (33991, 133)
FD003 test FE shape: (16596, 133)
FD004 test FE shape: (41214, 133)


  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values
  df[f"{sensor}_roll{win_short}_mean"] = (
  df[f"{sensor}_roll{win_short}_std"] = (
  df[f"{sensor}_roll{win_long}_mean"] = (
  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values


# **8. RUN INFERENCE FOR ALL FDs & BUILD METRICS SUMMARY**

In [9]:
# 8. — RUN INFERENCE FOR ALL FDs & BUILD METRICS SUMMARY

# 1. Run unified inference for each FD using ENGINEERED test sets
res_fd001, metrics_fd001 = run_fd_inference("FD001", bundle_fd001, test_fd001_fe)
res_fd002, metrics_fd002 = run_fd_inference("FD002", bundle_fd002, test_fd002_fe)
res_fd003, metrics_fd003 = run_fd_inference("FD003", bundle_fd003, test_fd003_fe)
res_fd004, metrics_fd004 = run_fd_inference("FD004", bundle_fd004, test_fd004_fe)

# 2. Build a unified metrics table
summary_rows = []

# FD001
summary_rows.append({
    "FD": "FD001",
    "Model": bundle_fd001["model_name"],
    "Seq_Len": bundle_fd001["seq_len"],
    "Val_RMSE": cfg_fd001.get("val_rmse", None),
    "Val_MAE": cfg_fd001.get("val_mae", None),
    "Test_RMSE_cal": metrics_fd001["rmse"],
    "Test_MAE_cal": metrics_fd001["mae"],
    "NASA_Mean_cal": metrics_fd001["nasa_mean"],
})

# FD002
summary_rows.append({
    "FD": "FD002",
    "Model": bundle_fd002["model_name"],
    "Seq_Len": bundle_fd002["seq_len"],
    "Val_RMSE": cfg_fd002.get("val_rmse", None),
    "Val_MAE": cfg_fd002.get("val_mae", None),
    "Test_RMSE_cal": metrics_fd002["rmse"],
    "Test_MAE_cal": metrics_fd002["mae"],
    "NASA_Mean_cal": metrics_fd002["nasa_mean"],
})

# FD003
summary_rows.append({
    "FD": "FD003",
    "Model": bundle_fd003["model_name"],
    "Seq_Len": bundle_fd003["seq_len"],
    "Val_RMSE": cfg_fd003.get("val_rmse", None),
    "Val_MAE": cfg_fd003.get("val_mae", None),
    "Test_RMSE_cal": metrics_fd003["rmse"],
    "Test_MAE_cal": metrics_fd003["mae"],
    "NASA_Mean_cal": metrics_fd003["nasa_mean"],
})

# FD004
summary_rows.append({
    "FD": "FD004",
    "Model": bundle_fd004["model_name"],
    "Seq_Len": bundle_fd004["seq_len"],
    # FD004 config stores NASA best val score, not val RMSE/MAE
    "Val_RMSE": None,
    "Val_MAE": None,
    "Test_RMSE_cal": metrics_fd004["rmse"],
    "Test_MAE_cal": metrics_fd004["mae"],
    "NASA_Mean_cal": metrics_fd004["nasa_mean"],
})

metrics_summary_df = pd.DataFrame(summary_rows)

print("\n================ UNIFIED TEST METRICS (CALIBRATED) ================")
display(metrics_summary_df)

# 3. Quick peek at one result dataframe (e.g., FD001)
print("\nFD001 — First 10 engines (true vs predicted RUL, calibrated):")
display(res_fd001.head(10))

log("Unified inference run for all FDs.")


[INFO] 
===== Running Unified Inference for FD001 =====
[INFO] Built sequences: X_seq shape = (100, 30, 93), y_last = (100,), engines = 100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step
[INFO] FD001: RMSE=14.3260, MAE=10.6828, NASA_mean=3.5616
[INFO] FD001: Inference complete for 100 engines.
[INFO] 
===== Running Unified Inference for FD002 =====
[INFO] Built sequences: X_seq shape = (168, 100, 129), y_last = (168,), engines = 168
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 159ms/step
[INFO] FD002: RMSE=12.9961, MAE=9.8232, NASA_mean=2.1830
[INFO] FD002: Inference complete for 168 engines.
[INFO] 
===== Running Unified Inference for FD003 =====
[INFO] Built sequences: X_seq shape = (100, 30, 99), y_last = (100,), engines = 100




[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m1s[0m 368ms/step



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 137ms/step
[INFO] FD003: RMSE=13.1476, MAE=10.0236, NASA_mean=2.4736
[INFO] FD003: Inference complete for 100 engines.
[INFO] 
===== Running Unified Inference for FD004 =====
[INFO] Built sequences: X_seq shape = (188, 100, 129), y_last = (188,), engines = 188
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 187ms/step
[INFO] FD004: RMSE=21.7728, MAE=17.2332, NASA_mean=8.4547
[INFO] FD004: Inference complete for 188 engines.



Unnamed: 0,FD,Model,Seq_Len,Val_RMSE,Val_MAE,Test_RMSE_cal,Test_MAE_cal,NASA_Mean_cal
0,FD001,GRU_BASE,30,,,14.325994,10.682761,3.561591
1,FD002,HYBRID_LSTM_GRU_SEQ100,100,13.480088,9.636921,12.996126,9.823244,2.183039
2,FD003,GRU_TUNED,30,,,13.147555,10.023581,2.473596
3,FD004,GRU_100_TUNED,100,,,21.772821,17.233206,8.45468



FD001 — First 10 engines (true vs predicted RUL, calibrated):


Unnamed: 0,unit,true_RUL,pred_RUL_raw,pred_RUL_calibrated,abs_error,sq_error
0,1,112.0,117.458557,114.458557,2.458557,6.044503
1,2,98.0,122.716423,119.716423,21.716423,471.603027
2,3,69.0,53.042122,50.042122,18.957878,359.401154
3,4,82.0,92.629005,89.629005,7.629005,58.201725
4,5,91.0,95.171135,92.171135,1.171135,1.371557
5,6,93.0,112.567505,109.567505,16.567505,274.482208
6,7,91.0,108.567787,105.567787,14.567787,212.220428
7,8,95.0,94.905907,91.905907,3.094093,9.573414
8,9,111.0,122.697899,119.697899,8.697899,75.653442
9,10,96.0,82.807076,79.807076,16.192924,262.210815


[INFO] Unified inference run for all FDs.


# **9. SAVE UNIFIED RESULTS & INDEX**

In [10]:
# 9. SAVE UNIFIED RESULTS (METRICS + PER-FD PREDICTIONS)

# Base directory to store unified outputs
UNIFIED_OUT_DIR = os.path.join(CMAPSS_BASE, "UNIFIED_FD001_FD004")
os.makedirs(UNIFIED_OUT_DIR, exist_ok=True)

log(f"Saving unified outputs to: {UNIFIED_OUT_DIR}")

# 1. Save unified metrics table
metrics_path = os.path.join(UNIFIED_OUT_DIR, "FD001_FD004_UNIFIED_METRICS.csv")
metrics_summary_df.to_csv(metrics_path, index=False)
log(f"Unified metrics saved to: {metrics_path}")

# 2. Save per-FD prediction results
fd001_out_path = os.path.join(UNIFIED_OUT_DIR, "FD001_TEST_PREDICTIONS.csv")
fd002_out_path = os.path.join(UNIFIED_OUT_DIR, "FD002_TEST_PREDICTIONS.csv")
fd003_out_path = os.path.join(UNIFIED_OUT_DIR, "FD003_TEST_PREDICTIONS.csv")
fd004_out_path = os.path.join(UNIFIED_OUT_DIR, "FD004_TEST_PREDICTIONS.csv")

res_fd001.to_csv(fd001_out_path, index=False)
res_fd002.to_csv(fd002_out_path, index=False)
res_fd003.to_csv(fd003_out_path, index=False)
res_fd004.to_csv(fd004_out_path, index=False)

log(f"FD001 predictions saved to: {fd001_out_path}")
log(f"FD002 predictions saved to: {fd002_out_path}")
log(f"FD003 predictions saved to: {fd003_out_path}")
log(f"FD004 predictions saved to: {fd004_out_path}")

# 3. Save a small JSON index pointing to configs (for VS Code / FastAPI use)
unified_index = {
    "FD001": {
        "config_path": FD001_CONFIG_PATH,
        "model_path": bundle_fd001["model_path"],
    },
    "FD002": {
        "config_path": FD002_CONFIG_PATH,
        "model_path": bundle_fd002["model_path"],
    },
    "FD003": {
        "config_path": FD003_CONFIG_PATH,
        "model_path": bundle_fd003["model_path"],
    },
    "FD004": {
        "config_path": FD004_CONFIG_PATH,
        "model_path": bundle_fd004["model_path"],
    },
}

unified_index_path = os.path.join(UNIFIED_OUT_DIR, "UNIFIED_MODEL_INDEX.json")
with open(unified_index_path, "w") as f:
    json.dump(unified_index, f, indent=4)

log(f"Unified model index JSON saved to: {unified_index_path}")

print("\nSaved files:")
print(" - Metrics:", metrics_path)
print(" - FD001 predictions:", fd001_out_path)
print(" - FD002 predictions:", fd002_out_path)
print(" - FD003 predictions:", fd003_out_path)
print(" - FD004 predictions:", fd004_out_path)
print(" - Unified index:", unified_index_path)

log("Unified outputs persisted.")


[INFO] Saving unified outputs to: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/UNIFIED_FD001_FD004
[INFO] Unified metrics saved to: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/UNIFIED_FD001_FD004/FD001_FD004_UNIFIED_METRICS.csv
[INFO] FD001 predictions saved to: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/UNIFIED_FD001_FD004/FD001_TEST_PREDICTIONS.csv
[INFO] FD002 predictions saved to: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/UNIFIED_FD001_FD004/FD002_TEST_PREDICTIONS.csv
[INFO] FD003 predictions saved to: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/UNIFIED_FD001_FD004/FD003_TEST_PREDICTIONS.csv
[INFO] FD004 predictions saved to: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/UNIFIED_FD001_FD004/FD004_TEST_PREDICTIONS.csv
[INFO] Unified model index JSON saved to: /content/drive/MyDrive/OIL PREDICTIVE MAINTAINANCE/CMaps/UNIFIED_FD001_FD004/UNIFIED_MODEL_INDEX.json

Saved files:
 - Metrics: /content/drive/MyDriv

# **10. Single-Engine Prediction Helper (for API / Streamlit)**

In [11]:
# 10. SINGLE-ENGINE PREDICTION HELPER (FOR API / STREAMLIT)

# Mapping FD name -> bundle for convenience
BUNDLES = {
    "FD001": bundle_fd001,
    "FD002": bundle_fd002,
    "FD003": bundle_fd003,
    "FD004": bundle_fd004,
}

def predict_single_engine(fd_name: str, df_engine_raw: pd.DataFrame):
    """
    Predict RUL for a SINGLE engine for a given FD dataset.

    Expected input:
      - fd_name in {"FD001", "FD002", "FD003", "FD004"}
      - df_engine_raw: DataFrame containing ONLY ONE engine, with columns:
          ['unit', 'cycle', 'setting1', 'setting2', 'setting3', 'sensor1'..'sensor21']
        (no engineered features, no scaling)

    Returns:
      - result dict with raw & calibrated RUL predictions for the last cycle.
    """
    if fd_name not in BUNDLES:
        raise ValueError(f"Unknown fd_name '{fd_name}'. Must be one of {list(BUNDLES.keys())}.")

    bundle = BUNDLES[fd_name]
    feature_cols = bundle["feature_cols"]
    seq_len      = bundle["seq_len"]
    feat_scaler  = bundle["feature_scaler"]
    rul_scaler   = bundle["rul_scaler"]
    shift        = bundle["nasa_shift"]
    max_rul_cap  = bundle["nasa_max_rul_cap"]

    # Basic checks
    if df_engine_raw["unit"].nunique() != 1:
        raise ValueError("df_engine_raw must contain exactly ONE engine (one unique 'unit').")

    # Sort by cycle
    df_engine_raw = df_engine_raw.sort_values("cycle").copy()


    # 1. Add rolling + delta features
    sensor_cols = [c for c in df_engine_raw.columns if c.startswith("sensor")]
    df_engine_fe = add_rolling_and_delta_features(df_engine_raw, sensor_cols)


    # 2. Attach a dummy capped RUL column if missing
    # (needed only to satisfy sequence builder target_col, but I won't use y_true)
    if "RUL_true_capped" not in df_engine_fe.columns:
        df_engine_fe["RUL_true_capped"] = 0.0  # dummy values


    # 3. Scale features
    missing_feats = [c for c in feature_cols if c not in df_engine_fe.columns]
    if missing_feats:
        raise KeyError(
            f"{fd_name}: Missing engineered feature columns in single-engine DF: {missing_feats[:10]}..."
        )

    df_scaled = df_engine_fe.copy()
    df_scaled[feature_cols] = feat_scaler.transform(df_scaled[feature_cols])


    # 4. Build last-window sequence
    # For a single engine, build_last_window_sequences will return exactly 1 sequence.
    X_seq, _, engine_ids = build_last_window_sequences(
        df_scaled,
        feature_cols=feature_cols,
        seq_len=seq_len,
        target_col="RUL_true_capped",
    )

    if X_seq.shape[0] == 0:
        raise RuntimeError(
            f"{fd_name}: Not enough timesteps for this engine. "
            f"Need at least seq_len={seq_len}, but got {len(df_engine_raw)}."
        )


    # 5. Predict (scaled RUL)
    y_pred_scaled = bundle["model"].predict(X_seq).flatten()
    y_pred_raw = rul_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

    # Only one engine here
    pred_raw = float(y_pred_raw[0])


    # 6. Apply NASA calibration (shift + cap)
    pred_cal = pred_raw - shift
    pred_cal = max(0.0, min(pred_cal, max_rul_cap))


    # 7. Build result dict
    unit_id = int(df_engine_raw["unit"].iloc[0])
    last_cycle = int(df_engine_raw["cycle"].iloc[-1])

    result = {
        "fd_name": fd_name,
        "unit": unit_id,
        "last_cycle": last_cycle,
        "seq_len_used": seq_len,
        "pred_RUL_raw": pred_raw,
        "pred_RUL_calibrated": pred_cal,
        "nasa_shift": shift,
        "nasa_max_rul_cap": max_rul_cap,
    }

    log(
        f"Single-engine prediction -> {fd_name} | unit={unit_id} | "
        f"cycle={last_cycle} | RUL_raw={pred_raw:.2f} | RUL_cal={pred_cal:.2f}"
    )
    return result



# QUICK DEMO USING ONE ENGINE FROM FD001 TEST
# This is just to show usage inside the notebook; safe to remove later if you want.

example_engine_id = 1
df_example_fd001 = test_fd001[test_fd001["unit"] == example_engine_id][
    ["unit", "cycle", "setting1", "setting2", "setting3"] +
    [f"sensor{i}" for i in range(1, 22)]
]

example_result = predict_single_engine("FD001", df_example_fd001)
print("\nExample single-engine prediction (FD001, unit=1):")
for k, v in example_result.items():
    print(f"  {k}: {v}")

log("Single-engine prediction helper ready.")


[INFO] Built sequences: X_seq shape = (1, 30, 93), y_last = (1,), engines = 1
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[INFO] Single-engine prediction -> FD001 | unit=1 | cycle=31 | RUL_raw=117.46 | RUL_cal=114.46

Example single-engine prediction (FD001, unit=1):
  fd_name: FD001
  unit: 1
  last_cycle: 31
  seq_len_used: 30
  pred_RUL_raw: 117.45854187011719
  pred_RUL_calibrated: 114.45854187011719
  nasa_shift: 3.0
  nasa_max_rul_cap: 125.0
[INFO] Single-engine prediction helper ready.


  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values
  df[f"{sensor}_roll{win_short}_mean"] = (
  df[f"{sensor}_roll{win_short}_std"] = (
  df[f"{sensor}_roll{win_long}_mean"] = (
  df[f"{sensor}_roll{win_long}_std"] = (
  df[f"{sensor}_delta"] = grp.diff().fillna(0).values
  df_engine_fe["RUL_true_capped"] = 0.0  # dummy values
