In [7]:
import os
import sys

# Add the root directory to the system path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import necessary functions from model.py
from src.model import train_random_forest, train_xgboost, train_lstm, evaluate_model, save_model
from src.utils import preprocess_data
from sklearn.model_selection import train_test_split
import pandas as pd

In [11]:
# Load your data (assuming train_df and test_df are already preprocessed)
# You may have already performed this in a separate notebook step

# Example for loading data (you can skip this if data is already loaded)
train_data_path = '../data/CMaps/train_FD001.txt'
test_data_path = '../data/CMaps/test_FD001.txt'
rul_data_path = '../data/CMaps/RUL_FD001.txt'
col_names = [
    'engine_id', 'time_in_cycles', 
    'operational_setting_1', 'operational_setting_2', 'operational_setting_3'
] + [f'sensor_{i}' for i in range(1, 27)]  # This creates sensor_1 to sensor_21
train_df = pd.read_csv(train_data_path, sep=' ', header=None, names=col_names)
test_df = pd.read_csv(test_data_path, sep=' ', header=None, names=col_names)
rul_df = pd.read_csv(rul_data_path, header=None, names=['RUL'])
test_df['RUL'] = test_df['engine_id'].map(lambda x: rul_df.loc[x - 1, 'RUL'])
train_df['RUL'] = train_df['engine_id'].map(lambda x: rul_df.loc[x - 1, 'RUL'])

# Preprocess data (normalization, encoding, etc.)
train_df, test_df = preprocess_data(train_df, test_df)

# Define features and target variable
X = train_df.drop(columns=['RUL', 'engine_id', 'time_in_cycles'])  # Features (drop non-feature columns)
y = train_df['RUL']  # Target (RUL)

# Split the training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Model
rf_model = train_random_forest(X_train, y_train)

# Train XGBoost Model
xgb_model = train_xgboost(X_train, y_train)

# Train LSTM Model (reshape for LSTM input)
X_train_lstm = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_val_lstm = X_val.values.reshape((X_val.shape[0], 1, X_val.shape[1]))
lstm_model = train_lstm(X_train_lstm, y_train)

# Evaluate Models
rf_mae, rf_rmse = evaluate_model(rf_model, X_val, y_val)
xgb_mae, xgb_rmse = evaluate_model(xgb_model, X_val, y_val)
lstm_mae, lstm_rmse = evaluate_model(lstm_model, X_val_lstm, y_val)

# Print evaluation metrics
print(f"Random Forest - MAE: {rf_mae}, RMSE: {rf_rmse}")
print(f"XGBoost - MAE: {xgb_mae}, RMSE: {xgb_rmse}")
print(f"LSTM - MAE: {lstm_mae}, RMSE: {lstm_rmse}")

# Save the best model (e.g., Random Forest in this case)
# save_model(rf_model, 'random_forest_model')

AttributeError: 'numpy.ndarray' object has no attribute 'values'