In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb

df = pd.read_csv("C:/Users/ammar/SHAP_ML/outputs/bearing_rul.csv")
X = df.drop(columns=["RUL", "filename"])
y = df["RUL"]

# === 2. Trial setup ===
N_TRIALS = 25
xgb_scores = []
rf_scores = []

# === 3. Run 25 trials ===
for i in range(N_TRIALS):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42 + i
    )

    # XGBoost
    xgb_model = xgb.XGBRegressor(n_estimators=100, random_state=42 + i)
    xgb_model.fit(X_train, y_train)
    y_pred_xgb = xgb_model.predict(X_test)
    xgb_scores.append([
        mean_absolute_error(y_test, y_pred_xgb),
        np.sqrt(mean_squared_error(y_test, y_pred_xgb)),  # RMSE
        r2_score(y_test, y_pred_xgb)
    ])

    # Random Forest
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42 + i)
    rf_model.fit(X_train, y_train)
    y_pred_rf = rf_model.predict(X_test)
    rf_scores.append([
        mean_absolute_error(y_test, y_pred_rf),
        np.sqrt(mean_squared_error(y_test, y_pred_rf)),  # RMSE
        r2_score(y_test, y_pred_rf)
    ])

# === 4. Create DataFrames and Print Mean Results ===
metrics = ['MAE', 'RMSE', 'R²']
xgb_df = pd.DataFrame(xgb_scores, columns=metrics)
rf_df = pd.DataFrame(rf_scores, columns=metrics)

print("XGBoost Mean Scores:")
print(xgb_df.mean())

print("Random Forest Mean Scores:")
print(rf_df.mean())

XGBoost Mean Scores:
MAE     3.915804
RMSE    5.011498
R²      0.999935
dtype: float64
Random Forest Mean Scores:
MAE     0.871907
RMSE    1.113974
R²      0.999997
dtype: float64
