In [1]:
# 1. Imports
import pandas as pd
import joblib
import shap
from sklearn.model_selection import train_test_split

# 2. Load and prepare CMAPSS data
column_names = [
    'unit', 'cycle', 'operational_setting_1', 'operational_setting_2', 'operational_setting_3'
] + [f'sensor_measurement_{i}' for i in range(1, 22)]

df = pd.read_csv('C:/Users/ammar/SHAP_ML/datasets/train_FD001.txt', sep='\s+', header=None, names=column_names)

rul = df.groupby('unit')['cycle'].max().reset_index()
rul.columns = ['unit', 'max_cycle']
df = df.merge(rul, on='unit')
df['RUL'] = df['max_cycle'] - df['cycle']
df.drop(columns=['max_cycle'], inplace=True)

drop_cols = ['operational_setting_3', 'sensor_measurement_1', 'sensor_measurement_5', 
             'sensor_measurement_10', 'sensor_measurement_16', 'sensor_measurement_18', 
             'sensor_measurement_19']
df.drop(columns=drop_cols, inplace=True)

X = df.drop(columns=['unit', 'cycle', 'RUL'])
y = df['RUL']

# 3. Split
units = df['unit'].unique()
train_units, test_units = train_test_split(units, test_size=0.2, random_state=42)

X_test = df[df['unit'].isin(test_units)].drop(columns=['unit', 'cycle', 'RUL'])

# Optional: subsample for speed
X_sample = X_test.sample(n=500, random_state=42)

# 4. Load models
xgb_model = joblib.load('C:/Users/ammar/SHAP_ML/models/cmaps_xgboost.pkl')
rf_model = joblib.load('C:/Users/ammar/SHAP_ML/models/cmaps_randomforest.pkl')

# 5. Compute SHAP values
xgb_explainer = shap.Explainer(xgb_model)
xgb_shap_values = xgb_explainer(X_sample)

rf_explainer = shap.Explainer(rf_model)
rf_shap_values = rf_explainer(X_sample)

# 6. Save for visualization
joblib.dump((X_sample, xgb_shap_values, rf_shap_values), 'C:/Users/ammar/SHAP_ML/models/cmaps_shap_values.pkl')

['C:/Users/ammar/SHAP_ML/models/cmaps_shap_values.pkl']