In [2]:
# bearing_classification_shap.py

import pandas as pd
import numpy as np
import shap
import joblib
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os

# === 1. Load Dataset ===
filepath = "C:/Users/ammar/SHAP_ML/outputs/bearing_classification.csv"
df = pd.read_csv(filepath)
X = df.drop(columns=["filename", "timestamp_index", "label"])
y = df["label"]

# === 2. SHAP Averaging Config ===
N_TRIALS = 25
n_features = X.shape[1]
shap_xgb_matrix = np.zeros((N_TRIALS, n_features))
shap_rf_matrix = np.zeros((N_TRIALS, n_features))

# === 3. Run Trials ===
print("Running 25 trials for Bearings Classification...")
for i in range(N_TRIALS):
    print(f"Trial {i+1}/{N_TRIALS}")
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42 + i, stratify=y
    )

    # --- XGBoost ---
    xgb_model = xgb.XGBClassifier(eval_metric='logloss', use_label_encoder=False)
    xgb_model.fit(X_train, y_train)
    explainer_xgb = shap.Explainer(xgb_model, X_train)
    shap_vals_xgb = explainer_xgb(X_test)
    shap_xgb_matrix[i, :] = np.abs(shap_vals_xgb.values).mean(axis=0)

    # --- Random Forest ---
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42 + i)
    rf_model.fit(X_train, y_train)
    explainer_rf = shap.Explainer(rf_model, X_train)
    shap_vals_rf = explainer_rf(X_test)
    vals = shap_vals_rf.values
if vals.ndim == 3:
    # shape = (samples, features, classes), use class 1
    vals = vals[:, :, 1]
elif vals.shape[1] == 2:
    # shape = (samples, classes), fallback protection
    raise ValueError("Unexpected SHAP output shape: likely incorrect model or input format.")
shap_rf_matrix[i, :] = np.abs(vals).mean(axis=0)

# === 4. Average and Save ===
xgb_mean = shap_xgb_matrix.mean(axis=0)
rf_mean = shap_rf_matrix.mean(axis=0)

shap_xgb_df = pd.DataFrame({'feature': X.columns, 'mean_abs_shap': xgb_mean})
shap_rf_df = pd.DataFrame({'feature': X.columns, 'mean_abs_shap': rf_mean})

out_dir = "C:/Users/ammar/SHAP_ML/trials_v/bearings_classification"
os.makedirs(out_dir, exist_ok=True)

shap_xgb_df.sort_values(by='mean_abs_shap', ascending=False).to_csv(
    os.path.join(out_dir, "shap_avg_xgb.csv"), index=False)
shap_rf_df.sort_values(by='mean_abs_shap', ascending=False).to_csv(
    os.path.join(out_dir, "shap_avg_rf.csv"), index=False)

# === 5. Plot Top 10 ===
def plot_top10(df, model_name):
    top10 = df.sort_values(by='mean_abs_shap', ascending=False).head(10)
    plt.figure(figsize=(10, 5))
    plt.barh(top10['feature'][::-1], top10['mean_abs_shap'][::-1])
    plt.xlabel('Mean |SHAP value|')
    plt.title(f'Bearings Classification - Top 10 Features ({model_name})')
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, f"top10_{model_name.lower()}_bar.png"), dpi=300)
    plt.close()

plot_top10(shap_xgb_df, "XGBoost")
plot_top10(shap_rf_df, "RandomForest")

print("Bearings Classification SHAP averaging complete. Outputs saved.")


Running 25 trials for Bearings Classification...
Trial 1/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 2/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 3/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 4/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 5/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 6/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 7/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 8/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 9/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 10/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 11/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 12/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 13/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 14/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 15/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 16/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 17/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 18/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 19/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 20/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 21/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 22/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 23/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 24/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Trial 25/25


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Bearings Classification SHAP averaging complete. Outputs saved.
