In [1]:
import pandas as pd 
import numpy as np 
import torch 
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

Vediamo se funziona import dei dati.

In [2]:
DATA_DIR = "../data"

TRAIN_PATH = f"{DATA_DIR}/train_FD001.txt"
TEST_PATH  = f"{DATA_DIR}/test_FD001.txt"
RUL_PATH   = f"{DATA_DIR}/RUL_FD001.txt"

In [None]:
column_names = (
    ["id", "cycle"] +
    ["setting1", "setting2", "setting3"] +
    [f"sensor{i}" for i in range(1, 22)]
)

train_df = pd.read_csv(
    TRAIN_PATH,
    sep=r"\s+",
    header=None,
    names=column_names
)

test_df = pd.read_csv(
    TEST_PATH,
    sep=r"\s+",
    header=None,
    names=column_names
)

In [5]:
true_rul = np.loadtxt(RUL_PATH)

In [6]:
print(train_df.shape)

(20631, 26)


In [7]:
train_df.head()

Unnamed: 0,id,cycle,setting1,setting2,setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


In [8]:
def drop_uninformative_columns(df, cols=None):
    """
    Rimuove le colonne non utilizzate per il modello.
    Restituisce anche la lista delle colonne effettivamente droppate.
    """
    if cols is None:
        cols = [
            'setting1', 'setting2', 'setting3',
            'sensor1', 'sensor5', 'sensor6',
            'sensor10', 'sensor16', 'sensor18', 'sensor19'
        ]

    df_clean = df.copy()
    to_drop = [c for c in cols if c in df_clean.columns]
    df_clean.drop(columns=to_drop, inplace=True)

    return df_clean

In [9]:
train_df = drop_uninformative_columns(train_df)
print(train_df.head())

   id  cycle  sensor2  sensor3  sensor4  sensor7  sensor8  sensor9  sensor11  \
0   1      1   641.82  1589.70  1400.60   554.36  2388.06  9046.19     47.47   
1   1      2   642.15  1591.82  1403.14   553.75  2388.04  9044.07     47.49   
2   1      3   642.35  1587.99  1404.20   554.26  2388.08  9052.94     47.27   
3   1      4   642.35  1582.79  1401.87   554.45  2388.11  9049.48     47.13   
4   1      5   642.37  1582.85  1406.22   554.00  2388.06  9055.15     47.28   

   sensor12  sensor13  sensor14  sensor15  sensor17  sensor20  sensor21  
0    521.66   2388.02   8138.62    8.4195       392     39.06   23.4190  
1    522.28   2388.07   8131.49    8.4318       392     39.00   23.4236  
2    522.42   2388.03   8133.23    8.4178       390     38.95   23.3442  
3    522.86   2388.08   8133.83    8.3682       392     38.88   23.3739  
4    522.19   2388.04   8133.80    8.4294       393     38.90   23.4044  


In [10]:
def compute_rul(df, id_col="id", cycle_col="cycle"):
    """
    Calcola la RUL per ogni riga:
        RUL = max_cycle(id) - cycle
    """
    df_rul = df.copy()
    max_cycles = df_rul.groupby(id_col)[cycle_col].max()
    df_rul["RUL"] = df_rul[id_col].map(max_cycles) - df_rul[cycle_col]
    return df_rul


def rul_cap(df, max_rul=125):
    """
    Applica un limite superiore alla RUL (C-MAPSS richiede capping).
    """
    if "RUL" not in df.columns:
        raise ValueError("RUL must be computed before capping.")

    df_capped = df.copy()
    df_capped["RUL"] = df_capped["RUL"].clip(upper=max_rul)
    return df_capped

In [11]:
train_df = compute_rul(train_df)
print(train_df.head())

   id  cycle  sensor2  sensor3  sensor4  sensor7  sensor8  sensor9  sensor11  \
0   1      1   641.82  1589.70  1400.60   554.36  2388.06  9046.19     47.47   
1   1      2   642.15  1591.82  1403.14   553.75  2388.04  9044.07     47.49   
2   1      3   642.35  1587.99  1404.20   554.26  2388.08  9052.94     47.27   
3   1      4   642.35  1582.79  1401.87   554.45  2388.11  9049.48     47.13   
4   1      5   642.37  1582.85  1406.22   554.00  2388.06  9055.15     47.28   

   sensor12  sensor13  sensor14  sensor15  sensor17  sensor20  sensor21  RUL  
0    521.66   2388.02   8138.62    8.4195       392     39.06   23.4190  191  
1    522.28   2388.07   8131.49    8.4318       392     39.00   23.4236  190  
2    522.42   2388.03   8133.23    8.4178       390     38.95   23.3442  189  
3    522.86   2388.08   8133.83    8.3682       392     38.88   23.3739  188  
4    522.19   2388.04   8133.80    8.4294       393     38.90   23.4044  187  


In [12]:
train_df = rul_cap(train_df)
print(train_df.head())

   id  cycle  sensor2  sensor3  sensor4  sensor7  sensor8  sensor9  sensor11  \
0   1      1   641.82  1589.70  1400.60   554.36  2388.06  9046.19     47.47   
1   1      2   642.15  1591.82  1403.14   553.75  2388.04  9044.07     47.49   
2   1      3   642.35  1587.99  1404.20   554.26  2388.08  9052.94     47.27   
3   1      4   642.35  1582.79  1401.87   554.45  2388.11  9049.48     47.13   
4   1      5   642.37  1582.85  1406.22   554.00  2388.06  9055.15     47.28   

   sensor12  sensor13  sensor14  sensor15  sensor17  sensor20  sensor21  RUL  
0    521.66   2388.02   8138.62    8.4195       392     39.06   23.4190  125  
1    522.28   2388.07   8131.49    8.4318       392     39.00   23.4236  125  
2    522.42   2388.03   8133.23    8.4178       390     38.95   23.3442  125  
3    522.86   2388.08   8133.83    8.3682       392     38.88   23.3739  125  
4    522.19   2388.04   8133.80    8.4294       393     38.90   23.4044  125  


Ora dobbiamo creare le SlidingWindows 

In [13]:
class SlidingWindowGenerator:
    def __init__(self, window_size, feature_cols, target_col, id_col = "id", stride = 1):
        self.window_size = window_size
        self.feature_cols = feature_cols
        self.target_col = target_col
        self.id_col = id_col
        self.stride = stride
    
    def transform(self, df):

        X_list, y_list = [], []

        for id, group in df.groupby(self.id_col):
            x = group[self.feature_cols].values
            y = group[self.target_col].values
            n_cycles = len(group)

            if n_cycles < self.window_size:
                continue

            indices = range(0, n_cycles - self.window_size + 1, self.stride)

            for start in indices:
                end = start + self.window_size
                X_list.append(x[start:end])
                y_list.append(y[end - 1])
            
        X_out = np.stack(X_list) if X_list else np.empty((0, self.window_size, len(self.feature_cols)))
        y_out = np.array(y_list) if y_list else np.empty((0,))

        return X_out, y_out 

    def get_params(self):
        return {"window_size" : self.window_size, "stride" : self.stride, "num_features" : len(self.feature_cols)}
    


In [14]:
feature_cols = list(train_df.columns)
feature_cols.remove("RUL")

In [15]:
sw = SlidingWindowGenerator(
    window_size=30,
    feature_cols=feature_cols,
    target_col="RUL",
    id_col="id",
    stride=1
)

In [16]:
from sklearn.model_selection import train_test_split

engine_ids = train_df["id"].unique()

train_ids, val_ids = train_test_split(
    engine_ids, test_size=0.2, random_state=42
)

In [17]:
train_df_split = train_df[train_df["id"].isin(train_ids)]
val_df_split   = train_df[train_df["id"].isin(val_ids)]

In [18]:
X_train, y_train = sw.transform(train_df_split)
X_val, y_val     = sw.transform(val_df_split)

In [19]:
print(set(train_df_split["id"]).intersection(set(val_df_split["id"])))

set()


In [20]:
from sklearn.preprocessing import StandardScaler 

N_train, T, F = X_train.shape

X_train_2d = X_train.reshape(-1, F)
scaler = StandardScaler()
X_train_scaled_2d = scaler.fit_transform(X_train_2d)

In [21]:
N_val = X_val.shape[0]

X_val_2d = X_val.reshape(-1, F)
X_val_scaled_2d = scaler.transform(X_val_2d)

In [22]:
X_train_scaled = X_train_scaled_2d.reshape(N_train, T, F)
X_val_scaled   = X_val_scaled_2d.reshape(N_val, T, F)

In [23]:
print(X_train_scaled.mean(axis=(0,1)))
print(X_train_scaled.std(axis=(0,1)))

[ 1.15889802e-12 -1.93775717e-14  2.63704262e-10 -2.96275799e-12
  1.44102249e-11 -6.16730512e-11  1.30607379e-08  6.15406677e-11
 -3.20857642e-11  1.31248689e-10  9.71778784e-09 -3.41024245e-12
  1.61841452e-11 -2.66398341e-14  6.44508735e-13 -1.65518013e-11]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [24]:
print(X_val_scaled.mean(axis=(0,1)))

[-0.27015694 -0.04828585  0.03123073  0.01615084  0.03238681 -0.02322893
  0.02322153 -0.0304183   0.0239359  -0.02182792  0.02071528 -0.03449098
  0.0187379   0.01636852 -0.047899   -0.03201162]


Qui facciamo un test per vedere che tutto funzioni correttamente

In [25]:
import pandas as pd 
import numpy as np

from preprocessing.data_clean import (
    drop_uninformative_columns,
    compute_rul,
    rul_cap
)

from preprocessing.SlidingWindowClass import SlidingWindowGenerator
from preprocessing.scaling import TimeSeriesScaler

from models.GNN_Transformer import st_gnn_transformer

In [26]:
DATA_DIR = "../data"

column_names = (
    ["id", "cycle"] +
    ["setting1", "setting2", "setting3"] +
    [f"sensor{i}" for i in range(1, 22)]
)

train_df = pd.read_csv(
    f"{DATA_DIR}/train_FD001.txt",
    sep=r"\s+",
    header=None,
    names=column_names
)

In [27]:
train_df = drop_uninformative_columns(train_df)
train_df = compute_rul(train_df)
train_df = rul_cap(train_df, max_rul=125)

In [28]:
from sklearn.model_selection import train_test_split

engine_ids = train_df["id"].unique()

train_ids, val_ids = train_test_split(
    engine_ids,
    test_size=0.2,
    random_state=42
)

train_df_split = train_df[train_df["id"].isin(train_ids)]
val_df_split   = train_df[train_df["id"].isin(val_ids)]

In [29]:
feature_cols = [c for c in train_df.columns if "sensor" in c]

In [30]:
sw = SlidingWindowGenerator(
    window_size=30,
    feature_cols=feature_cols,
    target_col="RUL"
)

X_train, y_train = sw.transform(train_df_split)
X_val, y_val     = sw.transform(val_df_split)

In [31]:
scaler = TimeSeriesScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)

In [32]:
print(X_train_scaled.shape)
print(X_val_scaled.shape)

print(X_train_scaled.mean(axis=(0,1)))
print(X_train_scaled.std(axis=(0,1)))

(14241, 30, 14)
(3490, 30, 14)
[ 2.63704262e-10 -2.96275799e-12  1.44102249e-11 -6.16730512e-11
  1.30607379e-08  6.15406677e-11 -3.20857642e-11  1.31248689e-10
  9.71778784e-09 -3.41024245e-12  1.61841452e-11 -2.66398341e-14
  6.44508735e-13 -1.65518013e-11]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [33]:
import torch
from models.GNN_Transformer.st_gnn_transformer import STGNNTransformer
from models.GNN_Transformer.losses import RMSELoss

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -----------------------
# CONFIGURAZIONE MODELLO
# -----------------------
num_sensors = X_train_scaled.shape[2]

config = {
    "num_nodes": num_sensors,
    "input_features": 1,
    "gnn_hidden_dim": 128,
    "trans_d_model": 256,
    "trans_nhead": 1,
    "trans_layers": 2,
    "dropout_prob": 0
}

# -----------------------
# MATRICE DI ADIACENZA
# -----------------------
init_adj_matrix = torch.ones(
    config["num_nodes"],
    config["num_nodes"]
)

# -----------------------
# MODELLO E LOSS
# -----------------------
model = STGNNTransformer(config, init_adj_matrix).to(device)
criterion = RMSELoss()



In [34]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
import numpy as np

# =========================
# IPERPARAMETRI
# =========================
EPOCHS = 30
BATCH_SIZE = 64
LR = 1e-3

# =========================
# DATASET & DATALOADER
# =========================
X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

X_val_t = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)

train_loader = DataLoader(
    TensorDataset(X_train_t, y_train_t),
    batch_size=BATCH_SIZE,
    shuffle=True
)

val_loader = DataLoader(
    TensorDataset(X_val_t, y_val_t),
    batch_size=BATCH_SIZE,
    shuffle=False
)

# =========================
# OPTIMIZER
# =========================
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# =========================
# TRACKING LOSS
# =========================
train_losses = []
val_losses = []

# =========================
# TRAINING LOOP
# =========================
for epoch in range(EPOCHS):

    # ---- TRAIN ----
    model.train()
    train_loss_epoch = 0.0

    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)

        # (B, T, F) → (B, T, N, 1)
        xb = xb.unsqueeze(-1)

        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)

        loss.backward()
        optimizer.step()

        train_loss_epoch += loss.item() * xb.size(0)

    train_loss_epoch /= len(train_loader.dataset)
    train_losses.append(train_loss_epoch)


    # ---- VALIDATION ----
    model.eval()
    val_loss_epoch = 0.0

    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            yb = yb.to(device)

            xb = xb.unsqueeze(-1)
            preds = model(xb)
            loss = criterion(preds, yb)

            val_loss_epoch += loss.item() * xb.size(0)

    val_loss_epoch /= len(val_loader.dataset)
    val_losses.append(val_loss_epoch)

    # ---- LOG ----
    print(
        f"Epoch [{epoch+1:02d}/{EPOCHS}] | "
        f"Train RMSE: {train_loss_epoch:.3f} | "
        f"Val RMSE: {val_loss_epoch:.3f}"
    )

# =========================
# PLOT CURVE
# =========================
plt.figure(figsize=(8, 5))
plt.plot(train_losses, label="Train RMSE")
plt.plot(val_losses, label="Validation RMSE")
plt.xlabel("Epoch")
plt.ylabel("RMSE")
plt.title("Training vs Validation RMSE")
plt.legend()
plt.grid(True)
plt.show()

Epoch [01/30] | Train RMSE: 85.812 | Val RMSE: 82.506
Epoch [02/30] | Train RMSE: 71.135 | Val RMSE: 61.854
Epoch [03/30] | Train RMSE: 47.322 | Val RMSE: 71.598
Epoch [04/30] | Train RMSE: 27.715 | Val RMSE: 77.627
Epoch [05/30] | Train RMSE: 24.643 | Val RMSE: 81.737
Epoch [06/30] | Train RMSE: 23.175 | Val RMSE: 24.124
Epoch [07/30] | Train RMSE: 22.743 | Val RMSE: 26.077
Epoch [08/30] | Train RMSE: 22.538 | Val RMSE: 72.783
Epoch [09/30] | Train RMSE: 22.154 | Val RMSE: 101.294
Epoch [10/30] | Train RMSE: 23.690 | Val RMSE: 346.950
Epoch [11/30] | Train RMSE: 22.954 | Val RMSE: 132.447
Epoch [12/30] | Train RMSE: 21.024 | Val RMSE: 719.548
Epoch [13/30] | Train RMSE: 19.095 | Val RMSE: 447.883
Epoch [14/30] | Train RMSE: 18.242 | Val RMSE: 1199.787


KeyboardInterrupt: 