In [1]:
# 1. Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

# 2. Load CMAPSS FD001 data
column_names = [
    'unit', 'cycle', 'operational_setting_1', 'operational_setting_2', 'operational_setting_3'
] + [f'sensor_measurement_{i}' for i in range(1, 22)]

df = pd.read_csv('C:/Users/ammar/SHAP_ML/datasets/train_FD001.txt', sep='\s+', header=None, names=column_names)

# 3. Add RUL (Remaining Useful Life)
rul = df.groupby('unit')['cycle'].max().reset_index()
rul.columns = ['unit', 'max_cycle']
df = df.merge(rul, on='unit')
df['RUL'] = df['max_cycle'] - df['cycle']
df.drop(columns=['max_cycle'], inplace=True)

# 4. Drop low-variance/redundant columns
drop_cols = ['operational_setting_3', 'sensor_measurement_1', 'sensor_measurement_5', 
             'sensor_measurement_10', 'sensor_measurement_16', 'sensor_measurement_18', 
             'sensor_measurement_19']
df.drop(columns=drop_cols, inplace=True)

# 5. Prepare features and target
X = df.drop(columns=['unit', 'cycle', 'RUL'])
y = df['RUL']

# 6. Train/test split (by unit)
units = df['unit'].unique()
train_units, test_units = train_test_split(units, test_size=0.2, random_state=42)

X_train = df[df['unit'].isin(train_units)].drop(columns=['unit', 'cycle', 'RUL'])
y_train = df[df['unit'].isin(train_units)]['RUL']

X_test = df[df['unit'].isin(test_units)].drop(columns=['unit', 'cycle', 'RUL'])
y_test = df[df['unit'].isin(test_units)]['RUL']

# 7. Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# 8. Predict and evaluate
y_pred = rf_model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("RMSE:", rmse)
print("R² Score:", r2_score(y_test, y_pred))

# 9. Save model
joblib.dump(rf_model, 'C:/Users/ammar/SHAP_ML/models/cmaps_randomforest.pkl')

MAE: 26.011692874692876
RMSE: 35.56578029421143
R² Score: 0.7065257451592782


['C:/Users/ammar/SHAP_ML/models/cmaps_randomforest.pkl']