### Import

In [1]:
import pandas as pd 
import numpy as np
from pathlib import Path

PROJECT_ROOT = Path("..")
DATA_DIR = PROJECT_ROOT / "data"

COLUMN_NAMES = (
    ["id", "cycle", "setting1", "setting2", "setting3"] + [f"sensor{i}" for i in range(1, 22)])

In [2]:
train_path = DATA_DIR / "train_FD001.txt"
test_path  = DATA_DIR / "test_FD001.txt"
rul_path   = DATA_DIR / "RUL_FD001.txt"

train_raw = pd.read_csv(train_path, sep=r"\s+", header=None, names=COLUMN_NAMES)
test_raw  = pd.read_csv(test_path,  sep=r"\s+", header=None, names=COLUMN_NAMES)

rul_true = pd.read_csv(rul_path, sep=r"\s+", header=None, names=["RUL_true"])

In [3]:
print("Train shape:", train_raw.shape)
print("Test  shape:", test_raw.shape)
print("RUL_true shape:", rul_true.shape)

print("Train engines:", train_raw["id"].nunique())
print("Test engines :", test_raw["id"].nunique())

assert list(train_raw.columns) == COLUMN_NAMES
assert list(test_raw.columns) == COLUMN_NAMES
assert rul_true.shape[0] == test_raw["id"].nunique()

Train shape: (20631, 26)
Test  shape: (13096, 26)
RUL_true shape: (100, 1)
Train engines: 100
Test engines : 100


### Cleaning

In [4]:
from preprocessing.data_clean import drop_uninformative_columns, rul_cap, compute_rul

In [5]:
train_df = train_raw.copy()

train_df = compute_rul(train_df)

RUL_CAP = 130
train_df = rul_cap(train_df, RUL_CAP)
train_df, feature_cols = drop_uninformative_columns(train_df)

train_df.head()

Unnamed: 0,id,cycle,sensor2,sensor3,sensor4,sensor7,sensor8,sensor9,sensor11,sensor12,sensor13,sensor14,sensor15,sensor17,sensor20,sensor21,RUL
0,1,1,641.82,1589.7,1400.6,554.36,2388.06,9046.19,47.47,521.66,2388.02,8138.62,8.4195,392,39.06,23.419,130
1,1,2,642.15,1591.82,1403.14,553.75,2388.04,9044.07,47.49,522.28,2388.07,8131.49,8.4318,392,39.0,23.4236,130
2,1,3,642.35,1587.99,1404.2,554.26,2388.08,9052.94,47.27,522.42,2388.03,8133.23,8.4178,390,38.95,23.3442,130
3,1,4,642.35,1582.79,1401.87,554.45,2388.11,9049.48,47.13,522.86,2388.08,8133.83,8.3682,392,38.88,23.3739,130
4,1,5,642.37,1582.85,1406.22,554.0,2388.06,9055.15,47.28,522.19,2388.04,8133.8,8.4294,393,38.9,23.4044,130


In [6]:
print("Train shape:", train_df.shape)
print("Feature cols:", feature_cols)
print("Num nodes:", len(train_df.columns))

train_df[["id", "cycle", "RUL"]].head()
train_df["RUL"].describe()

Train shape: (20631, 17)
Feature cols: ['sensor2', 'sensor3', 'sensor4', 'sensor7', 'sensor8', 'sensor9', 'sensor11', 'sensor12', 'sensor13', 'sensor14', 'sensor15', 'sensor17', 'sensor20', 'sensor21']
Num nodes: 17


count    20631.000000
mean        88.727304
std         43.444137
min          0.000000
25%         51.000000
50%        103.000000
75%        130.000000
max        130.000000
Name: RUL, dtype: float64

### Sliding Windows

In [7]:
from preprocessing.SlidingWindowClass import SlidingWindowGenerator, split_by_engine

train_df_split, val_df_split = split_by_engine(train_df, val_ratio=0.2)

sw_generator = SlidingWindowGenerator(
    window_size=30,
    feature_cols=feature_cols,
    target_col="RUL",
    id_col="id"
)

X_train, y_train = sw_generator.transform(train_df_split)
X_val,   y_val   = sw_generator.transform(val_df_split)

print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_val  :", X_val.shape,   "y_val  :", y_val.shape)

X_train: (14207, 30, 14) y_train: (14207,)
X_val  : (3524, 30, 14) y_val  : (3524,)


### Scaling

In [8]:
from preprocessing.scaling import TimeSeriesScaler

scaler = TimeSeriesScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)


print("Train mean (≈0):", X_train_scaled.mean(axis=(0, 1)))
print("Train std  (≈1):", X_train_scaled.std(axis=(0, 1)))

Train mean (≈0): [ 1.96005201e-10 -8.82388593e-12  1.84165728e-11 -3.62221302e-11
  8.86340758e-09  4.66085298e-11 -2.91752922e-11  6.28246918e-11
  7.24928281e-09 -2.53710308e-11  7.51534220e-12 -2.80741235e-14
 -2.54853615e-11 -2.08851370e-11]
Train std  (≈1): [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


### DataLoaders

In [9]:
from preprocessing.dataloaders import create_dataloaders

train_loader, val_loader = create_dataloaders(X_train_scaled, y_train, X_val_scaled, y_val)

### Training

In [None]:
import torch
from models.GNN_Transformer.st_gnn_transformer import STGNNTransformer
from models.GNN_Transformer.losses import RMSELoss

config = {
    "num_nodes": len(feature_cols),
    "input_features": 1,

    "gnn_hidden_dim": 16,
    "trans_d_model": 32,
    "trans_nhead": 4,
    "trans_layers": 1,
    "dropout_prob": 0.4,
    "epochs" : 15
}

init_adj_matrix = torch.ones(config["num_nodes"], config['num_nodes'])

model = STGNNTransformer(config, init_adj_matrix)
criterion = RMSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr = 1.5e-3, weight_decay=1e-4)


In [12]:
from trainer import train_loop

history = train_loop(
    model,
    train_loader,
    val_loader,
    optimizer,
    criterion,
    epochs=config["epochs"],
    device="cpu",
    save_path="training/models/best_model.pt",
    config=config,
    feature_cols=feature_cols
)

Epoch [01/30] | Train RMSE: 82.036 | Val RMSE: 63.794
Best model saved with RMSE = 63.794)
Epoch [02/30] | Train RMSE: 51.335 | Val RMSE: 29.975
Best model saved with RMSE = 29.975)
Epoch [03/30] | Train RMSE: 22.212 | Val RMSE: 16.886
Best model saved with RMSE = 16.886)
Epoch [04/30] | Train RMSE: 14.807 | Val RMSE: 14.482
Best model saved with RMSE = 14.482)
Epoch [05/30] | Train RMSE: 13.773 | Val RMSE: 13.697
Best model saved with RMSE = 13.697)
Epoch [06/30] | Train RMSE: 13.363 | Val RMSE: 13.297
Best model saved with RMSE = 13.297)
Epoch [07/30] | Train RMSE: 13.092 | Val RMSE: 13.673
Epoch [08/30] | Train RMSE: 12.774 | Val RMSE: 13.934
Epoch [09/30] | Train RMSE: 12.488 | Val RMSE: 14.082
Epoch [10/30] | Train RMSE: 12.208 | Val RMSE: 13.645
Epoch [11/30] | Train RMSE: 12.012 | Val RMSE: 15.029
Epoch [12/30] | Train RMSE: 11.470 | Val RMSE: 14.821
Epoch [13/30] | Train RMSE: 11.260 | Val RMSE: 14.599
Epoch [14/30] | Train RMSE: 10.763 | Val RMSE: 15.640
Epoch [15/30] | Train 

In [14]:
from pathlib import Path
import joblib

artifact_dir = Path("training/artifacts")
artifact_dir.mkdir(parents=True, exist_ok=True)

joblib.dump(
    scaler,
    artifact_dir / "scaler.joblib"
)

['training/artifacts/scaler.joblib']

In [15]:
import torch
import joblib

checkpoint = torch.load("training/models/best_model.pt", map_location="cpu")
config = checkpoint["config"]
feature_cols = checkpoint["feature_cols"]

scaler = joblib.load("training/artifacts/scaler.joblib")