In [2]:
import os
import sys

# Add the root directory to the system path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import necessary functions from model.py
import importlib
from src.model import train_random_forest, train_xgboost, train_lstm, evaluate_model, save_model
from src.utils import preprocess_data
from sklearn.model_selection import train_test_split
import pandas as pd

Load your data (assuming train_df and test_df are already preprocessed)

In [3]:
# importlib.reload(src.model)

# Example for loading data (you can skip this if data is already loaded)
train_data_path = '../data/CMaps/train_FD001.txt'
test_data_path = '../data/CMaps/test_FD001.txt'
rul_data_path = '../data/CMaps/RUL_FD001.txt'

# Create column names
col_names = [
    'engine_id', 'time_in_cycles', 
    'operational_setting_1', 'operational_setting_2', 'operational_setting_3'
] + [f'sensor_{i}' for i in range(1, 27)]  # This creates sensor_1 to sensor_21

# Read the data into pandas dataframes
train_df = pd.read_csv(train_data_path, sep=' ', header=None, names=col_names)
test_df = pd.read_csv(test_data_path, sep=' ', header=None, names=col_names)
rul_df = pd.read_csv(rul_data_path, header=None, names=['RUL'])

Preprocess data (normalization, encoding, etc.)

In [4]:
# Drop NaN values
train_df.dropna(axis=1, how='all', inplace=True)
test_df.dropna(axis=1, how='all', inplace=True)

# Add RUL values to data
test_df['RUL'] = test_df['engine_id'].map(lambda x: rul_df.loc[x - 1, 'RUL'])
train_df['RUL'] = train_df['engine_id'].map(lambda x: rul_df.loc[x - 1, 'RUL'])

# train_df, test_df = preprocess_data(train_df, test_df)

Testing the models

In [5]:
# Define features and target variable
X = train_df.drop(columns=['RUL', 'engine_id', 'time_in_cycles'])  # Features (drop non-feature columns) - all columns except the target column
y = train_df['RUL']  # Target (RUL)

# Split the training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

"Train Random Forest Model"
rf_model = train_random_forest(X_train, y_train)

# Train XGBoost Model
xgb_model, dval = train_xgboost(X_train, X_val, y_train, y_val)

# Train LSTM Model (reshape for LSTM input)
# X_train_lstm = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
# X_val_lstm = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))
lstm_model = train_lstm(X_train, y_train)

# Evaluate Models
rf_mae, rf_rmse = evaluate_model(rf_model, X_val, y_val)
xgb_mae, xgb_rmse = evaluate_model(xgb_model, dval, y_val)
lstm_mae, lstm_rmse = evaluate_model(lstm_model, X_val, y_val)

# Print evaluation metrics
print(f"Random Forest - MAE: {rf_mae}, RMSE: {rf_rmse}")
print(f"XGBoost - MAE: {xgb_mae}, RMSE: {xgb_rmse}")
print(f"LSTM - MAE: {lstm_mae}, RMSE: {lstm_rmse}")

# Save the best model (e.g., Random Forest in this case)
# save_model(rf_model, 'random_forest_model')

[0]	eval-rmse:41.40210
[1]	eval-rmse:41.16507
[2]	eval-rmse:41.04990
[3]	eval-rmse:41.01928
[4]	eval-rmse:40.95461
[5]	eval-rmse:40.98197
[6]	eval-rmse:40.98020
[7]	eval-rmse:41.01046
[8]	eval-rmse:41.00811
[9]	eval-rmse:41.02626
[10]	eval-rmse:41.04439
[11]	eval-rmse:41.04784
[12]	eval-rmse:41.10469
[13]	eval-rmse:41.12859
[14]	eval-rmse:41.14795
[15]	eval-rmse:41.16565
[16]	eval-rmse:41.17014
[17]	eval-rmse:41.23583
[18]	eval-rmse:41.25310
[19]	eval-rmse:41.25674
[20]	eval-rmse:41.28022
[21]	eval-rmse:41.31827
[22]	eval-rmse:41.34931
[23]	eval-rmse:41.39355
[24]	eval-rmse:41.41230
[25]	eval-rmse:41.45475
[26]	eval-rmse:41.44801
[27]	eval-rmse:41.45946
[28]	eval-rmse:41.51802
[29]	eval-rmse:41.54504
[30]	eval-rmse:41.56192
[31]	eval-rmse:41.58399
[32]	eval-rmse:41.61107
[33]	eval-rmse:41.65905
[34]	eval-rmse:41.67317
[35]	eval-rmse:41.67754
[36]	eval-rmse:41.73702
[37]	eval-rmse:41.75877
[38]	eval-rmse:41.77945
[39]	eval-rmse:41.79709
[40]	eval-rmse:41.83025
[41]	eval-rmse:41.84325
[4