In [83]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.inspection import permutation_importance
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Bidirectional, Dropout, Dense
from tensorflow.keras.optimizers import Adam
import xgboost as xgb
import time

In [150]:
column_names = ['engine_id', 'cycle','op_setting_1', 'op_setting_2', 'op_setting_3'] + [f'sensor_{i}' for i in range(1, 24)]

train_df = pd.read_csv('train_FD001.txt', sep=' ', header=None)
test_df = pd.read_csv('test_FD001.txt', sep=' ', header=None)
rul_df = pd.read_csv('RUL_FD001.txt', header=None, names=['RUL'])

train_df_2 = pd.read_csv('train_FD002.txt', sep=' ', header=None)
test_df_2 = pd.read_csv('test_FD002.txt', sep=' ', header=None)
rul_df_2 = pd.read_csv('RUL_FD002.txt', header=None, names=['RUL'])

train_df_3 = pd.read_csv('train_FD003.txt', sep=' ', header=None)
test_df_3 = pd.read_csv('test_FD003.txt', sep=' ', header=None)
rul_df_3 = pd.read_csv('RUL_FD003.txt', header=None, names=['RUL'])

train_df_4 = pd.read_csv('train_FD004.txt', sep=' ', header=None)
test_df_4 = pd.read_csv('test_FD004.txt', sep=' ', header=None)
rul_df_4 = pd.read_csv('RUL_FD004.txt', header=None, names=['RUL'])

train_df.columns = column_names
test_df.columns = column_names

train_df_2.columns = column_names
test_df_2.columns = column_names

train_df_3.columns = column_names
test_df_3.columns = column_names

train_df_4.columns = column_names
test_df_4.columns = column_names

In [151]:
max_cycle = train_df.groupby('engine_id')['cycle'].max().reset_index()
max_cycle.columns = ['engine_id', 'max_cycle']
train_df = train_df.merge(max_cycle, on='engine_id')
train_df['RUL'] = train_df['max_cycle'] - train_df['cycle']
train_df.drop('max_cycle', axis=1, inplace=True)

max_cycle_2 = train_df_2.groupby('engine_id')['cycle'].max().reset_index()
max_cycle_2.columns = ['engine_id', 'max_cycle']
train_df_2 = train_df_2.merge(max_cycle_2, on='engine_id')
train_df_2['RUL'] = train_df_2['max_cycle'] - train_df_2['cycle']
train_df_2.drop('max_cycle', axis=1, inplace=True)

max_cycle_3 = train_df_3.groupby('engine_id')['cycle'].max().reset_index()
max_cycle_3.columns = ['engine_id', 'max_cycle']
train_df_3 = train_df_3.merge(max_cycle_3, on='engine_id')
train_df_3['RUL'] = train_df_3['max_cycle'] - train_df_3['cycle']
train_df_3.drop('max_cycle', axis=1, inplace=True)

max_cycle_4 = train_df_4.groupby('engine_id')['cycle'].max().reset_index()
max_cycle_4.columns = ['engine_id', 'max_cycle']
train_df_4 = train_df_4.merge(max_cycle_4, on='engine_id')
train_df_4['RUL'] = train_df_4['max_cycle'] - train_df_4['cycle']
train_df_4.drop('max_cycle', axis=1, inplace=True)

In [152]:
test_last_df = test_df.groupby('engine_id').last().reset_index()

test_last_df['RUL'] = rul_df['RUL']

test_last_df_2 = test_df_2.groupby('engine_id').last().reset_index()

test_last_df_2['RUL'] = rul_df_2['RUL']

test_last_df_3 = test_df_3.groupby('engine_id').last().reset_index()

test_last_df_3['RUL'] = rul_df_3['RUL']

test_last_df_4 = test_df_4.groupby('engine_id').last().reset_index()

test_last_df_4['RUL'] = rul_df_4['RUL']

In [153]:
features = train_df.columns.difference(['engine_id', 'cycle', 'RUL'])
features_2 = train_df_2.columns.difference(['engine_id', 'cycle', 'RUL'])
features_3 = train_df_3.columns.difference(['engine_id', 'cycle', 'RUL'])
features_4 = train_df_4.columns.difference(['engine_id', 'cycle', 'RUL'])

X_train = train_df.drop(columns=["engine_id","cycle","RUL","sensor_22","sensor_23"])
y_train = train_df["RUL"]

X_test = test_last_df.drop(columns=["engine_id","cycle","RUL","sensor_22","sensor_23"])
y_test = test_last_df["RUL"]

X_train_2 = train_df_2.drop(columns=["engine_id","cycle","RUL","sensor_22","sensor_23"])
y_train_2 = train_df_2["RUL"]

X_test_2 = test_last_df_2.drop(columns=["engine_id","cycle","RUL","sensor_22","sensor_23"])
y_test_2 = test_last_df_2["RUL"]

X_train_3 = train_df_3.drop(columns=["engine_id","cycle","RUL","sensor_22","sensor_23"])
y_train_3 = train_df_3["RUL"]

X_test_3 = test_last_df_3.drop(columns=["engine_id","cycle","RUL","sensor_22","sensor_23"])
y_test_3 = test_last_df_3["RUL"]

X_train_4 = train_df_4.drop(columns=["engine_id","cycle","RUL","sensor_22","sensor_23"])
y_train_4 = train_df_4["RUL"]

X_test_4 = test_last_df_4.drop(columns=["engine_id","cycle","RUL","sensor_22","sensor_23"])
y_test_4 = test_last_df_4["RUL"]

In [154]:
constant_or_nan_cols = [col for col in X_train.columns if X_train[col].nunique(dropna=True) <= 1]
X_train.drop(columns=constant_or_nan_cols, inplace=True)
X_test.drop(columns=constant_or_nan_cols, inplace=True)
y_train.drop(columns=constant_or_nan_cols, inplace=True)
y_test.drop(columns=constant_or_nan_cols, inplace=True)

X_train_2.drop(columns=constant_or_nan_cols, inplace=True)
X_test_2.drop(columns=constant_or_nan_cols, inplace=True)
y_train_2.drop(columns=constant_or_nan_cols, inplace=True)
y_test_2.drop(columns=constant_or_nan_cols, inplace=True)

X_train_3.drop(columns=constant_or_nan_cols, inplace=True)
X_test_3.drop(columns=constant_or_nan_cols, inplace=True)
y_train_3.drop(columns=constant_or_nan_cols, inplace=True)
y_test_3.drop(columns=constant_or_nan_cols, inplace=True)

X_train_4.drop(columns=constant_or_nan_cols, inplace=True)
X_test_4.drop(columns=constant_or_nan_cols, inplace=True)
y_train_4.drop(columns=constant_or_nan_cols, inplace=True)
y_test_4.drop(columns=constant_or_nan_cols, inplace=True)

In [155]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

scaler_2 = StandardScaler()
X_train_scaled_2 = scaler_2.fit_transform(X_train_2)
X_test_scaled_2 = scaler_2.transform(X_test_2)

scaler_3 = StandardScaler()
X_train_scaled_3 = scaler_3.fit_transform(X_train_3)
X_test_scaled_3 = scaler_3.transform(X_test_3)

scaler_4 = StandardScaler()
X_train_scaled_4 = scaler_4.fit_transform(X_train_4)
X_test_scaled_4 = scaler_4.transform(X_test_4)

In [156]:
def evaluate_model(name, y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    accuracy = 100 - mape
    # NASA Scoring Function
    h = y_pred - y_true
    score = np.sum(np.where(h < 0, np.exp(-h / 13) - 1, np.exp(h / 10) - 1))
    print(f"{name}: RMSE={rmse:.2f}, MAE={mae:.2f}, R²={r2:.2f}, Accuracy = {accuracy:.2f}%, Score={score:.2f} ")

In [94]:
#Linear Regression
start_lr = time.time()
lr = LinearRegression()
lr_2 = LinearRegression()
lr_3 = LinearRegression()
lr_4 = LinearRegression()
lr.fit(X_train_scaled, y_train)
lr_2.fit(X_train_scaled_2, y_train_2)
lr_3.fit(X_train_scaled_3, y_train_3)
lr_4.fit(X_train_scaled_4, y_train_4)
y_pred_lr = lr.predict(X_test_scaled)
y_pred_lr_2 = lr_2.predict(X_test_scaled_2)
y_pred_lr_3 = lr_3.predict(X_test_scaled_3)
y_pred_lr_4 = lr_4.predict(X_test_scaled_4)
end_lr = time.time()
print(f"Linear regression Training Time: {end_lr - start_lr:.3f} seconds")

Linear regression Training Time: 0.065 seconds


In [95]:
evaluate_model("Linear Regression_FD001", y_test, y_pred_lr)
evaluate_model("Linear Regression_FD002", y_test_2, y_pred_lr_2)
evaluate_model("Linear Regression_FD003", y_test_3, y_pred_lr_3)
evaluate_model("Linear Regression_FD004", y_test_4, y_pred_lr_4)

Linear Regression_FD001: RMSE=1026.63, MAE=25.59, R²=0.41, Accuracy = 44.64%, Score=13259.84 
Linear Regression_FD002: RMSE=1156.66, MAE=27.54, R²=0.60, Accuracy = 26.84%, Score=29090.43 
Linear Regression_FD003: RMSE=3287.80, MAE=47.31, R²=-0.92, Accuracy = -2.43%, Score=404325093.99 
Linear Regression_FD004: RMSE=2378.76, MAE=39.43, R²=0.20, Accuracy = 13.21%, Score=864464.43 


In [96]:
# Ridge regression
start_rdg = time.time()
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
y_pred_ridge = ridge.predict(X_test_scaled)

ridge_2 = Ridge(alpha=1.0)
ridge_2.fit(X_train_scaled_2, y_train_2)
y_pred_ridge_2 = ridge_2.predict(X_test_scaled_2)

ridge_3 = Ridge(alpha=1.0)
ridge_3.fit(X_train_scaled_3, y_train_3)
y_pred_ridge_3 = ridge_3.predict(X_test_scaled_3)

ridge_4 = Ridge(alpha=1.0)
ridge_4.fit(X_train_scaled_4, y_train_4)
y_pred_ridge_4 = ridge.predict(X_test_scaled_4)

end_rdg = time.time()
print(f"Ridge regression Training Time: {end_rdg - start_rdg:.3f} seconds")

Ridge regression Training Time: 0.046 seconds


In [97]:
evaluate_model("Ridge Regression_FD001", y_test, y_pred_ridge)
evaluate_model("Ridge Regression_FD002", y_test_2, y_pred_ridge_2)
evaluate_model("Ridge Regression_FD003", y_test_3, y_pred_ridge_3)
evaluate_model("Ridge Regression_FD004", y_test_4, y_pred_ridge_4)

Ridge Regression_FD001: RMSE=1026.61, MAE=25.59, R²=0.41, Accuracy = 44.64%, Score=13258.74 
Ridge Regression_FD002: RMSE=1132.84, MAE=27.25, R²=0.61, Accuracy = 29.32%, Score=26524.17 
Ridge Regression_FD003: RMSE=3287.77, MAE=47.31, R²=-0.92, Accuracy = -2.43%, Score=404137702.31 
Ridge Regression_FD004: RMSE=3683.56, MAE=49.87, R²=-0.24, Accuracy = -100.41%, Score=6505287.14 


In [98]:
# Lasso regression
start_lass = time.time()
lasso = Lasso(alpha=0.01)
lasso.fit(X_train_scaled, y_train)
y_pred_lasso = lasso.predict(X_test_scaled)

lasso_2 = Lasso(alpha=0.01)
lasso_2.fit(X_train_scaled_2, y_train_2)
y_pred_lasso_2 = lasso_2.predict(X_test_scaled_2)

lasso_3 = Lasso(alpha=0.01)
lasso_3.fit(X_train_scaled_3, y_train_3)
y_pred_lasso_3 = lasso_3.predict(X_test_scaled_3)

lasso_4 = Lasso(alpha=0.01)
lasso_4.fit(X_train_scaled_4, y_train_4)
y_pred_lasso_4 = lasso.predict(X_test_scaled_4)

end_lass = time.time()
print(f"Lasso regression Training Time: {end_lass - start_lass:.3f} seconds")

  model = cd_fast.enet_coordinate_descent(


Lasso regression Training Time: 25.687 seconds


  model = cd_fast.enet_coordinate_descent(


In [99]:
evaluate_model("Lasso Regression_FD001", y_test, y_pred_lasso)
evaluate_model("Lasso Regression_FD002", y_test_2, y_pred_lasso_2)
evaluate_model("Lasso Regression_FD003", y_test_3, y_pred_lasso_3)
evaluate_model("Lasso Regression_FD004", y_test_4, y_pred_lasso_4)

Lasso Regression_FD001: RMSE=1026.55, MAE=25.59, R²=0.41, Accuracy = 44.63%, Score=13261.18 
Lasso Regression_FD002: RMSE=1175.54, MAE=27.65, R²=0.59, Accuracy = 27.89%, Score=34076.67 
Lasso Regression_FD003: RMSE=3288.65, MAE=47.31, R²=-0.92, Accuracy = -2.44%, Score=407232846.49 
Lasso Regression_FD004: RMSE=3681.91, MAE=49.86, R²=-0.24, Accuracy = -100.40%, Score=6454004.97 


In [100]:
# ElasticNet regression
start_elst = time.time()
elastic = ElasticNet(alpha=0.01, l1_ratio=0.5)
elastic.fit(X_train_scaled, y_train)
y_pred_elastic = elastic.predict(X_test_scaled)

elastic_2 = ElasticNet(alpha=0.01, l1_ratio=0.5)
elastic_2.fit(X_train_scaled_2, y_train_2)
y_pred_elastic_2 = elastic_2.predict(X_test_scaled_2)

elastic_3 = ElasticNet(alpha=0.01, l1_ratio=0.5)
elastic_3.fit(X_train_scaled_3, y_train_3)
y_pred_elastic_3 = elastic_3.predict(X_test_scaled_3)

elastic_4 = ElasticNet(alpha=0.01, l1_ratio=0.5)
elastic_4.fit(X_train_scaled_4, y_train_4)
y_pred_elastic_4 = elastic_4.predict(X_test_scaled_4)

end_elst = time.time()
print(f"ElasticNet regression Training Time: {end_elst - start_elst:.3f} seconds")

  model = cd_fast.enet_coordinate_descent(


ElasticNet regression Training Time: 25.573 seconds


  model = cd_fast.enet_coordinate_descent(


In [101]:
evaluate_model("ElasticNet Regression_FD001", y_test, y_pred_elastic)
evaluate_model("ElasticNet Regression_FD002", y_test_2, y_pred_elastic_2)
evaluate_model("ElasticNet Regression_FD003", y_test_3, y_pred_elastic_3)
evaluate_model("ElasticNet Regression_FD004", y_test_4, y_pred_elastic_4)

ElasticNet Regression_FD001: RMSE=1024.05, MAE=25.57, R²=0.41, Accuracy = 44.70%, Score=13152.31 
ElasticNet Regression_FD002: RMSE=1488.76, MAE=32.47, R²=0.49, Accuracy = -11.64%, Score=45281.73 
ElasticNet Regression_FD003: RMSE=3285.19, MAE=47.30, R²=-0.92, Accuracy = -2.43%, Score=383241629.96 
ElasticNet Regression_FD004: RMSE=2837.10, MAE=44.40, R²=0.05, Accuracy = -47.57%, Score=1833320.95 


In [103]:
# K-Nearest Neighbors model
start_knn = time.time()
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
y_pred_knn = knn.predict(X_test_scaled)

knn_2 = KNeighborsRegressor(n_neighbors=5)
knn_2.fit(X_train_scaled_2, y_train_2)
y_pred_knn_2 = knn_2.predict(X_test_scaled_2)

knn_3 = KNeighborsRegressor(n_neighbors=5)
knn_3.fit(X_train_scaled_3, y_train_3)
y_pred_knn_3 = knn_3.predict(X_test_scaled_3)

knn_4 = KNeighborsRegressor(n_neighbors=5)
knn_4.fit(X_train_scaled_4, y_train_4)
y_pred_knn_4 = knn_4.predict(X_test_scaled_4)

end_knn = time.time()
print(f"KNN Training Time: {end_knn - start_knn:.3f} seconds")

KNN Training Time: 0.067 seconds


In [104]:
evaluate_model("KNN Regression_FD001", y_test, y_pred_knn)
evaluate_model("KNN Regression_FD002", y_test_2, y_pred_knn_2)
evaluate_model("KNN Regression_FD003", y_test_3, y_pred_knn_3)
evaluate_model("KNN Regression_FD004", y_test_4, y_pred_knn_4)

KNN Regression_FD001: RMSE=1173.39, MAE=25.27, R²=0.32, Accuracy = 59.94%, Score=39494.11 
KNN Regression_FD002: RMSE=1428.44, MAE=27.32, R²=0.51, Accuracy = 56.24%, Score=434453.72 
KNN Regression_FD003: RMSE=3171.07, MAE=39.80, R²=-0.85, Accuracy = 44.48%, Score=198116325.73 
KNN Regression_FD004: RMSE=2263.50, MAE=35.44, R²=0.24, Accuracy = 45.54%, Score=6852355.67 


In [105]:
# RandomForest
start_rf = time.time()
rf = RandomForestRegressor(n_estimators=200, max_depth=20, random_state=82)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

rf_2 = RandomForestRegressor(n_estimators=200, max_depth=20, random_state=82)
rf_2.fit(X_train_2, y_train_2)
y_pred_rf_2 = rf_2.predict(X_test_2)

rf_3 = RandomForestRegressor(n_estimators=200, max_depth=20, random_state=82)
rf_3.fit(X_train_3, y_train_3)
y_pred_rf_3 = rf_3.predict(X_test_3)

rf_4 = RandomForestRegressor(n_estimators=200, max_depth=20, random_state=82)
rf_4.fit(X_train_4, y_train_4)
y_pred_rf_4 = rf_4.predict(X_test_4)

end_rf = time.time()
print(f"Random Forest Training Time: {end_rf - start_rf:.3f} seconds")

Random Forest Training Time: 447.348 seconds


In [107]:
evaluate_model("Random Forest method_FD001", y_test, y_pred_rf)
evaluate_model("Random Forest method_FD002", y_test_2, y_pred_rf_2)
evaluate_model("Random Forest method_FD003", y_test_3, y_pred_rf_3)
evaluate_model("Random Forest method_FD004", y_test_4, y_pred_rf_4)

Random Forest method_FD001: RMSE=1097.04, MAE=24.59, R²=0.36, Accuracy = 60.33%, Score=31996.76 
Random Forest method_FD002: RMSE=979.89, MAE=22.90, R²=0.66, Accuracy = 62.59%, Score=41569.28 
Random Forest method_FD003: RMSE=1974.88, MAE=32.26, R²=-0.15, Accuracy = 55.73%, Score=385263.09 
Random Forest method_FD004: RMSE=1789.11, MAE=31.10, R²=0.40, Accuracy = 53.55%, Score=5648317.62 


In [108]:
# XGBoost
start_xgb = time.time()
xgb_model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

xgb_model_2 = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6)
xgb_model_2.fit(X_train_2, y_train_2)
y_pred_xgb_2= xgb_model_2.predict(X_test_2)

xgb_model_3 = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6)
xgb_model_3.fit(X_train_3, y_train_3)
y_pred_xgb_3 = xgb_model_3.predict(X_test_3)

xgb_model_4 = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6)
xgb_model_4.fit(X_train_4, y_train_4)
y_pred_xgb_4 = xgb_model_4.predict(X_test_4)

end_xgb = time.time()
print(f"XGBoost Training Time: {end_rf - start_rf:.3f} seconds")

XGBoost Training Time: 447.348 seconds


In [109]:
evaluate_model("XGBoost method_FD001", y_test, y_pred_xgb)
evaluate_model("XGBoost method_FD002", y_test_2, y_pred_xgb_2)
evaluate_model("XGBoost method_FD003", y_test_3, y_pred_xgb_3)
evaluate_model("XGBoost method_FD004", y_test_4, y_pred_xgb_4)

XGBoost method_FD001: RMSE=1169.22, MAE=24.56, R²=0.32, Accuracy = 61.82%, Score=56126.11 
XGBoost method_FD002: RMSE=896.88, MAE=22.05, R²=0.69, Accuracy = 63.27%, Score=20932.83 
XGBoost method_FD003: RMSE=2139.61, MAE=32.75, R²=-0.25, Accuracy = 53.91%, Score=2015443.42 
XGBoost method_FD004: RMSE=1716.15, MAE=30.76, R²=0.42, Accuracy = 54.16%, Score=612272.30 


In [110]:
# SVR method
start_svr = time.time()
svr = SVR(kernel='rbf')
svr.fit(X_train_scaled, y_train)
y_pred_svr = svr.predict(X_test_scaled)

svr_2 = SVR(kernel='rbf')
svr_2.fit(X_train_scaled_2, y_train_2)
y_pred_svr_2 = svr_2.predict(X_test_scaled_2)

svr_3 = SVR(kernel='rbf')
svr_3.fit(X_train_scaled_3, y_train_3)
y_pred_svr_3 = svr_3.predict(X_test_scaled_3)

svr_4 = SVR(kernel='rbf')
svr_4.fit(X_train_scaled_4, y_train_4)
y_pred_svr_4 = svr_4.predict(X_test_scaled_4)

end_svr = time.time()
print(f"SVR Training Time: {end_svr - start_svr:.3f} seconds")

SVR Training Time: 818.587 seconds


In [111]:
evaluate_model("SVR method_FD001", y_test, y_pred_svr)
evaluate_model("SVR method_FD002", y_test_2, y_pred_svr_2)
evaluate_model("SVR method_FD003", y_test_3, y_pred_svr_3)
evaluate_model("SVR method_FD004", y_test_4, y_pred_svr_4)

SVR method_FD001: RMSE=681.52, MAE=18.59, R²=0.61, Accuracy = 68.40%, Score=6087.61 
SVR method_FD002: RMSE=1699.70, MAE=33.89, R²=0.41, Accuracy = -30.95%, Score=58478.31 
SVR method_FD003: RMSE=1857.99, MAE=30.73, R²=-0.08, Accuracy = 55.96%, Score=1403558.87 
SVR method_FD004: RMSE=2311.02, MAE=39.74, R²=0.22, Accuracy = -49.31%, Score=717890.67 


In [113]:
# Desion Tree Regressor method
start_dtr = time.time()
dt = DecisionTreeRegressor(max_depth=5)
dt.fit(X_train, y_train)
y_pred_dtr = dt.predict(X_test)

dt_2 = DecisionTreeRegressor(max_depth=5)
dt_2.fit(X_train_2, y_train_2)
y_pred_dtr_2 = dt_2.predict(X_test_2)

dt_3 = DecisionTreeRegressor(max_depth=5)
dt_3.fit(X_train_3, y_train_3)
y_pred_dtr_3 = dt_3.predict(X_test_3)

dt_4 = DecisionTreeRegressor(max_depth=5)
dt_4.fit(X_train_4, y_train_4)
y_pred_dtr_4 = dt_4.predict(X_test_4)

end_dtr = time.time()
print(f"Desion Tree Regressor Training Time: {end_dtr - start_dtr:.3f} seconds")

Desion Tree Regressor Training Time: 1.054 seconds


In [114]:
evaluate_model("Desion Tree Regressor method_FD001", y_test, y_pred_dtr)
evaluate_model("Desion Tree Regressor method_FD002", y_test_2, y_pred_dtr_2)
evaluate_model("Desion Tree Regressor method_FD003", y_test_3, y_pred_dtr_3)
evaluate_model("Desion Tree Regressor method_FD004", y_test_4, y_pred_dtr_4)

Desion Tree Regressor method_FD001: RMSE=1247.32, MAE=27.17, R²=0.28, Accuracy = 50.66%, Score=37207.42 
Desion Tree Regressor method_FD002: RMSE=1465.66, MAE=29.99, R²=0.49, Accuracy = 23.00%, Score=179948.26 
Desion Tree Regressor method_FD003: RMSE=2473.79, MAE=37.56, R²=-0.44, Accuracy = 47.93%, Score=1178988.75 
Desion Tree Regressor method_FD004: RMSE=2649.30, MAE=40.74, R²=0.11, Accuracy = 11.25%, Score=3663945.96 


In [115]:
# Gradient Boosting Regressor method
start_gbr = time.time()
gbr = GradientBoostingRegressor(n_estimators=100)
gbr.fit(X_train, y_train)
y_pred_gbr = gbr.predict(X_test)

gbr_2 = GradientBoostingRegressor(n_estimators=100)
gbr_2.fit(X_train_2, y_train_2)
y_pred_gbr_2 = gbr_2.predict(X_test_2)

gbr_3 = GradientBoostingRegressor(n_estimators=100)
gbr_3.fit(X_train_3, y_train_3)
y_pred_gbr_3 = gbr_3.predict(X_test_3)

gbr_4 = GradientBoostingRegressor(n_estimators=100)
gbr_4.fit(X_train_4, y_train_4)
y_pred_gbr_4 = gbr_4.predict(X_test_4)

end_gbr = time.time()
print(f"Gradient Boosting Regressor Training Time: {end_gbr - start_gbr:.3f} seconds")

Gradient Boosting Regressor Training Time: 59.203 seconds


In [116]:
evaluate_model("Gradient Boosting Regressor method_FD001", y_test, y_pred_gbr)
evaluate_model("Gradient Boosting Regressor method_FD002", y_test_2, y_pred_gbr_2)
evaluate_model("Gradient Boosting Regressor method_FD003", y_test_3, y_pred_gbr_3)
evaluate_model("Gradient Boosting Regressor method_FD004", y_test_4, y_pred_gbr_4)

Gradient Boosting Regressor method_FD001: RMSE=1062.60, MAE=23.54, R²=0.38, Accuracy = 62.45%, Score=23952.91 
Gradient Boosting Regressor method_FD002: RMSE=985.90, MAE=24.77, R²=0.66, Accuracy = 47.91%, Score=21634.43 
Gradient Boosting Regressor method_FD003: RMSE=2300.81, MAE=35.51, R²=-0.34, Accuracy = 49.58%, Score=460882.65 
Gradient Boosting Regressor method_FD004: RMSE=1932.77, MAE=34.76, R²=0.35, Accuracy = 31.98%, Score=468267.83 


In [117]:
# Extra Tree Regressor method
start_etr = time.time()
etr = ExtraTreesRegressor(n_estimators=100)
etr.fit(X_train, y_train)
y_pred_etr = etr.predict(X_test)

etr_2 = ExtraTreesRegressor(n_estimators=100)
etr_2.fit(X_train_2, y_train_2)
y_pred_etr_2 = etr_2.predict(X_test_2)

etr_3 = ExtraTreesRegressor(n_estimators=100)
etr_3.fit(X_train_3, y_train_3)
y_pred_etr_3 = etr_3.predict(X_test_3)

etr_4 = ExtraTreesRegressor(n_estimators=100)
etr_4.fit(X_train_4, y_train_4)
y_pred_etr_4 = etr_4.predict(X_test_4)

end_etr = time.time()
print(f"Extra Tree Regressor Training Time: {end_etr - start_etr:.3f} seconds")

Extra Tree Regressor Training Time: 90.499 seconds


In [118]:
evaluate_model("Extra Tree Regressor method_FD001", y_test, y_pred_etr)
evaluate_model("Extra Tree Regressor method_FD002", y_test_2, y_pred_etr_2)
evaluate_model("Extra Tree Regressor method_FD003", y_test_3, y_pred_etr_3)
evaluate_model("Extra Tree Regressor method_FD004", y_test_4, y_pred_etr_4)

Extra Tree Regressor method_FD001: RMSE=1082.19, MAE=24.44, R²=0.37, Accuracy = 60.85%, Score=26998.77 
Extra Tree Regressor method_FD002: RMSE=1013.79, MAE=23.37, R²=0.65, Accuracy = 61.64%, Score=48112.01 
Extra Tree Regressor method_FD003: RMSE=2250.36, MAE=33.43, R²=-0.31, Accuracy = 53.94%, Score=3393750.68 
Extra Tree Regressor method_FD004: RMSE=1821.46, MAE=31.10, R²=0.39, Accuracy = 51.93%, Score=3847222.45 


In [1]:
# Neural Network model_FD001
start_nn = time.time()
# Build model
nn_model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1)
])
nn_model.compile(optimizer='adam', loss='mse')
nn_model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=1)
y_pred_nn = nn_model.predict(X_test).flatten()
end_nn = time.time()
print(f"Neural Betwork Training Time: {end_nn - start_nn:.3f} seconds")
evaluate_model("Neural Network method_FD001", y_test, y_pred_nn)

NameError: name 'time' is not defined

In [181]:
#LSTM Model training_ FD001
start_lstm = time.time()

scaler = StandardScaler()
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

test_max_cycle = test_df.groupby('engine_id')['cycle'].max().reset_index()
test_max_cycle.columns = ['engine_id', 'max_cycle']
test_df = test_df.merge(test_max_cycle, on='engine_id')
test_df['RUL'] = test_df['max_cycle'] - test_df['cycle']
test_df.drop('max_cycle', axis=1, inplace=True)

train_df['RUL'] = train_df['RUL'].clip(upper=130)
test_df['RUL'] = test_df['RUL'].clip(upper=130)

sequence_length = 30

train_df[features] = train_df[features].fillna(0)

def create_sequences(df, sequence_length, features):
    sequences = []
    labels = []
    for engine_id in df['engine_id'].unique():
        engine_data = df[df['engine_id'] == engine_id]
        for i in range(len(engine_data) - sequence_length):
            seq = engine_data[features].iloc[i:i+sequence_length].values
            label = engine_data['RUL'].iloc[i + sequence_length]
            sequences.append(seq)
            labels.append(label)
    return np.array(sequences), np.array(labels)
    
X_train_seq, y_train_seq = create_sequences(train_df, sequence_length, features)
#X_test_seq, y_test_seq = create_sequences(test_df, sequence_length, features)

num_features = X_train_seq.shape[2]

model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=(sequence_length, num_features)))
model.add(Dropout(0.3))
model.add(LSTM(200))
model.add(Dropout(0.3))
model.add(Dense(1)) 

optimizer = Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
model.summary()

start_lstm = time.time()
model.fit(X_train_seq, y_train_seq, validation_split=0.2, epochs=100, batch_size=64)
end_lstm = time.time()
print(f"LSTM Training Time: {end_lstm - start_lstm:.2f} seconds")

test_sequences = []
valid_engine_ids = []

for engine_id in test_df["engine_id"].unique():
    engine_data = test_df[test_df["engine_id"] == engine_id][features]
    seq = engine_data.values
    if len(seq) < sequence_length:
        # Pad at the beginning with the first row repeated
        pad_width = sequence_length - len(seq)
        pad = np.repeat(seq[[0]], pad_width, axis=0)
        seq = np.vstack((pad, seq))
    else:
        seq = seq[-sequence_length:]  # last 30 steps

    test_sequences.append(seq)
    valid_engine_ids.append(engine_id)


X_test_seq = np.array(test_sequences)
X_test_seq = np.nan_to_num(X_test_seq)
y_pred_lstm = model.predict(X_test_seq).flatten()

end_lstm = time.time()
print(f"Extra Tree Regressor Training Time: {end_lstm - start_lstm:.3f} seconds")

# Convert to numpy arrays if not already
y_test_array = np.array(y_test)
y_pred_array = np.array(y_pred_lstm)

# Create a mask to filter out NaNs
mask = ~np.isnan(y_test_array) & ~np.isnan(y_pred_array)

# Filter
y_test_clean = y_test_array[mask]
y_pred_clean = y_pred_array[mask]

evaluate_model("LSTM Method", y_test, y_pred_lstm)

  super().__init__(**kwargs)


Epoch 1/100
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 131ms/step - loss: 7205.0654 - mae: 74.1747 - val_loss: 6146.0728 - val_mae: 67.2784
Epoch 2/100
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 131ms/step - loss: 5329.7612 - mae: 61.6033 - val_loss: 5552.4731 - val_mae: 63.3162
Epoch 3/100
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 128ms/step - loss: 4767.8677 - mae: 57.2459 - val_loss: 5046.3384 - val_mae: 59.6863
Epoch 4/100
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 130ms/step - loss: 4330.8433 - mae: 53.9735 - val_loss: 4585.2402 - val_mae: 56.2697
Epoch 5/100
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 125ms/step - loss: 3920.9849 - mae: 50.6006 - val_loss: 4161.8706 - val_mae: 53.1055
Epoch 6/100
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 129ms/step - loss: 3561.6250 - mae: 47.8971 - val_loss: 3782.3762 - val_mae: 50.1688
Epoch 7/100
[1m

In [179]:
# LSTM-BiDirectional Model
scaler = StandardScaler()
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

test_max_cycle = test_df.groupby('engine_id')['cycle'].max().reset_index()
test_max_cycle.columns = ['engine_id', 'max_cycle']
test_df = test_df.merge(test_max_cycle, on='engine_id')
test_df['RUL'] = test_df['max_cycle'] - test_df['cycle']
test_df.drop('max_cycle', axis=1, inplace=True)
rul_cap = 130
train_df['RUL'] = train_df['RUL'].clip(upper=rul_cap)

sequence_length = 30
#train_df[features] = train_df[features].fillna(0)
def create_sequences(df, sequence_length, features):
    sequences = []
    labels = []
    for engine_id in df['engine_id'].unique():
        engine_data = df[df['engine_id'] == engine_id]
        for i in range(len(engine_data) - sequence_length):
            seq = engine_data[features].iloc[i:i+sequence_length].values
            label = engine_data['RUL'].iloc[i + sequence_length]
            sequences.append(seq)
            labels.append(label)
    return np.array(sequences), np.array(labels)

X_train_seq, y_train_seq = create_sequences(train_df, sequence_length, features)
num_features = X_train_seq.shape[2]

model = Sequential()
model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=(sequence_length, num_features)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.3))
model.add(Dense(1))

model.compile(optimizer=Adam(learning_rate=1e-4), loss='mse', metrics=['mae'])
model.summary()

start_lstm = time.time()
model.fit(X_train_seq, y_train_seq, validation_split=0.25, epochs=100, batch_size=64)
end_lstm = time.time()
print(f"LSTM-BiDirectional Training Time: {end_lstm - start_lstm:.2f} seconds")

rul_df = pd.read_csv("RUL_FD001.txt", header=None, names=["true_RUL"])
rul_df["engine_id"] = rul_df.index + 1

test_sequences = []
valid_engine_ids = []

for engine_id in test_df["engine_id"].unique():
    engine_data = test_df[test_df["engine_id"] == engine_id]
    if len(engine_data) >= sequence_length:
        last_seq = engine_data.iloc[-sequence_length:][features].values
        test_sequences.append(last_seq)
        valid_engine_ids.append(engine_id)

X_test_final = np.array(test_sequences)
print("NaNs in X_test_final:", np.isnan(X_test_final).sum())
X_test_final = np.nan_to_num(X_test_final)

y_pred = model.predict(X_test_final).flatten()
y_true = rul_df[rul_df["engine_id"].isin(valid_engine_ids)]["true_RUL"].values
evaluate_model("LSTM-BiDirectional Method", y_test, y_pred)

  super().__init__(**kwargs)


Epoch 1/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 119ms/step - loss: 7754.6772 - mae: 77.3908 - val_loss: 6720.3237 - val_mae: 70.8598
Epoch 2/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 101ms/step - loss: 5869.8638 - mae: 65.0632 - val_loss: 6172.4487 - val_mae: 67.2961
Epoch 3/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 102ms/step - loss: 5406.7725 - mae: 61.8230 - val_loss: 5791.7832 - val_mae: 64.6841
Epoch 4/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 103ms/step - loss: 5068.8784 - mae: 59.1903 - val_loss: 5448.8643 - val_mae: 62.2248
Epoch 5/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 102ms/step - loss: 4707.1113 - mae: 56.3594 - val_loss: 5131.8970 - val_mae: 60.0610
Epoch 6/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 104ms/step - loss: 4453.3589 - mae: 54.4378 - val_loss: 4835.1201 - val_mae: 57.8635
Epoch 7/100
[1m