# 02 - Train RandomForest for RUL

This notebook demonstrates training a RandomForest baseline on CMAPSS (FD001), plots feature importance, and displays evaluation results.



In [None]:
# Setup
from pathlib import Path
import json
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
np.random.seed(42)

project_root = Path("..").resolve().parents[0] if (Path.cwd().name == "notebooks") else Path(".").resolve().parents[0]
processed_dir = Path("../data/processed").resolve()
models_dir = Path("../models").resolve()
results_dir = Path("../results").resolve()

# Ensure processed data exists
X_train_path = processed_dir / "X_train.npy"
if not X_train_path.exists():
    %run ../scripts/preprocess.py



In [None]:
# Load processed data
X_train = np.load(processed_dir / "X_train.npy")
y_train = np.load(processed_dir / "y_train.npy")
X_val = np.load(processed_dir / "X_val.npy")
y_val = np.load(processed_dir / "y_val.npy")
feature_names = json.loads((processed_dir / "feature_names.json").read_text(encoding="utf-8"))

X_train.shape, X_val.shape


In [None]:
# Train RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

model = RandomForestRegressor(n_estimators=300, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

y_pred = model.predict(X_val)
mae = mean_absolute_error(y_val, y_pred)
rmse = mean_squared_error(y_val, y_pred, squared=False)
r2 = r2_score(y_val, y_pred)

print({"MAE": mae, "RMSE": rmse, "R2": r2})


In [1]:
# Feature importance plot
import pandas as pd
import matplotlib.pyplot as plt

imp = pd.Series(model.feature_importances_, index=feature_names).sort_values(ascending=False)
plt.figure(figsize=(8,6))
imp.head(20).plot(kind='barh')
plt.gca().invert_yaxis()
plt.title("Top-20 Feature Importances (RandomForest)")
plt.xlabel("Importance")
plt.tight_layout()
plt.show()


NameError: name 'model' is not defined

In [2]:
# Save artifacts via training script for consistency
%run ../scripts/train_rf_model.py

# Display evaluation report
print((results_dir / "evaluation_report.txt").read_text())


ModuleNotFoundError: No module named 'scripts.evaluate'

NameError: name 'results_dir' is not defined