In [3]:
# ------------------- Imports -------------------
import numpy as np
import pandas as pd
import time
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam

# ------------------- Load Data -------------------
column_names = ['engine_id', 'cycle', 'op_setting_1', 'op_setting_2', 'op_setting_3'] + [f'sensor_{i}' for i in range(1, 22)]
train_df = pd.read_csv('train_FD001.txt', sep='\s+', header=None, names=column_names)
test_df = pd.read_csv('test_FD001.txt', sep='\s+', header=None, names=column_names)
rul_df = pd.read_csv('RUL_FD001.txt', header=None, names=["true_RUL"])
rul_df["engine_id"] = rul_df.index + 1

# ------------------- Preprocessing -------------------
# Drop sensor_22 and sensor_23 if present (optional)
train_df.drop(columns=["sensor_22", "sensor_23"], errors='ignore', inplace=True)
test_df.drop(columns=["sensor_22", "sensor_23"], errors='ignore', inplace=True)

# Define features
features = train_df.columns.difference(['engine_id', 'cycle', 'RUL'])

# Scale features
scaler = StandardScaler()
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

# Compute RUL
train_max_cycle = train_df.groupby('engine_id')['cycle'].max().reset_index()
train_max_cycle.columns = ['engine_id', 'max_cycle']
train_df = train_df.merge(train_max_cycle, on='engine_id')
train_df['RUL'] = train_df['max_cycle'] - train_df['cycle']
train_df.drop('max_cycle', axis=1, inplace=True)

test_max_cycle = test_df.groupby('engine_id')['cycle'].max().reset_index()
test_max_cycle.columns = ['engine_id', 'max_cycle']
test_df = test_df.merge(test_max_cycle, on='engine_id')
test_df['RUL'] = test_df['max_cycle'] - test_df['cycle']
test_df.drop('max_cycle', axis=1, inplace=True)

# Clip RUL
rul_cap = 130
train_df['RUL'] = train_df['RUL'].clip(upper=rul_cap)

# ------------------- Sequence Creation -------------------
sequence_length = 30

def create_sequences(df, sequence_length, features):
    sequences, labels = [], []
    for engine_id in df['engine_id'].unique():
        engine_data = df[df['engine_id'] == engine_id]
        for i in range(len(engine_data) - sequence_length):
            seq = engine_data[features].iloc[i:i+sequence_length].values
            label = engine_data['RUL'].iloc[i + sequence_length]
            sequences.append(seq)
            labels.append(label)
    return np.array(sequences), np.array(labels)

X_train_seq, y_train_seq = create_sequences(train_df, sequence_length, features)
num_features = X_train_seq.shape[2]

# ------------------- LSTM-Bidirectional Model -------------------
model = Sequential()
model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=(sequence_length, num_features)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(64, return_sequences=False)))
model.add(Dropout(0.3))
model.add(Dense(1))

model.compile(optimizer=Adam(learning_rate=1e-4), loss='mse', metrics=['mae'])
model.summary()

# ------------------- Training -------------------
start_time = time.time()
model.fit(X_train_seq, y_train_seq, validation_split=0.25, epochs=100, batch_size=64)
end_time = time.time()
print(f"LSTM-Bidirectional Training Time: {end_time - start_time:.2f} seconds")

# ------------------- Prepare Test Data -------------------
test_sequences = []
valid_engine_ids = []

for engine_id in test_df["engine_id"].unique():
    engine_data = test_df[test_df["engine_id"] == engine_id]
    if len(engine_data) >= sequence_length:
        last_seq = engine_data.iloc[-sequence_length:][features].values
        test_sequences.append(last_seq)
        valid_engine_ids.append(engine_id)

X_test_final = np.array(test_sequences)
X_test_final = np.nan_to_num(X_test_final)  # Ensure no NaNs

y_pred = model.predict(X_test_final).flatten()
y_true = rul_df[rul_df["engine_id"].isin(valid_engine_ids)]["true_RUL"].values

# ------------------- Evaluation -------------------
def evaluate_model(name, y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    accuracy = 100 - mape
    h = y_pred - y_true
    score = np.sum(np.where(h < 0, np.exp(-h / 13) - 1, np.exp(h / 10) - 1))
    print(f"{name} Results:")
    print(f"  RMSE     = {rmse:.2f}")
    print(f"  MAE      = {mae:.2f}")
    print(f"  R²       = {r2:.2f}")
    print(f"  Accuracy = {accuracy:.2f}%")
    print(f"  NASA Score = {score:.2f}")

evaluate_model("LSTM-Bidirectional Model", y_true, y_pred)


  super().__init__(**kwargs)


Epoch 1/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 95ms/step - loss: 7774.5420 - mae: 77.3359 - val_loss: 6797.3472 - val_mae: 71.3167
Epoch 2/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 78ms/step - loss: 5861.4092 - mae: 64.9034 - val_loss: 6285.1924 - val_mae: 68.1056
Epoch 3/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 79ms/step - loss: 5500.5449 - mae: 62.3865 - val_loss: 5903.4424 - val_mae: 65.5037
Epoch 4/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 77ms/step - loss: 5196.2886 - mae: 60.2554 - val_loss: 5560.7871 - val_mae: 63.1474
Epoch 5/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 80ms/step - loss: 4838.9116 - mae: 57.5065 - val_loss: 5246.3042 - val_mae: 61.1077
Epoch 6/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 83ms/step - loss: 4568.1924 - mae: 55.5118 - val_loss: 4944.1733 - val_mae: 58.5650
Epoch 7/100
[1m207/20