In [112]:
feature_names_clean = X_train_clean.columns.tolist()

n_samples = 500 
if len(X_train_clean) > n_samples:
    X_train_sample = X_train_clean.sample(n=n_samples, random_state=1)
else:
    X_train_sample = X_train_clean # Use full data if it's already small

print(f"SVR - SHAP Analysis on a sample of {len(X_train_sample)} data points")
print(f"Features being analyzed: {feature_names_clean}\n")

# 1. Select the SVR model
model_svr = best_svr_bayes.named_steps['svr'] 

# 2. Create Background Data Summary (using the full training set is best practice)
# This ensures the summary is representative of the whole dataset's characteristics.
# You could also slightly reduce the number of centroids (e.g., to 50) for a small extra speedup.
print("Creating background data summary with k-means...")
background_data = shap.kmeans(X_train_clean, 100)

# 3. Create SHAP Explainer (KernelExplainer for SVR)
explainer_svr = shap.KernelExplainer(model_svr, background_data)

# 4. Calculate SHAP Values on the SMALLER SAMPLE
print(f"Calculating SHAP values for {len(X_train_sample)} samples... (This will be much faster)")
shap_values_svr = explainer_svr.shap_values(X_train_sample)

# Plot 4: SVR - Mean Absolute SHAP Values (Feature Importance)
print("Saving SVR - Feature Importance plot...")
plt.figure(figsize=(10, 6))
shap.summary_plot(shap_values_svr, X_train_sample, feature_names=feature_names_clean, plot_type="bar", show=False)
plt.title("SVR - Mean Absolute SHAP Values (from Sample)\n(Feature Importance)", fontsize=14, fontweight='bold')
plt.xlabel("mean(|SHAP value|)", fontsize=12)
plt.tight_layout()
plt.savefig("svr_shap_importance_sample.png", dpi=300, bbox_inches='tight')
plt.show()

# Plot 5: SVR - SHAP Summary Plot (Dependency)
print("Saving SVR - SHAP Summary plot...")
plt.figure(figsize=(12, 8))
shap.summary_plot(shap_values_svr, X_train_sample, feature_names=feature_names_clean, show=False)
plt.title("SVR - SHAP Summary Plot (from Sample)\n(Feature Impact on Output)", fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig("svr_shap_summary_sample.png", dpi=300, bbox_inches='tight')
plt.show()

# Plot 6: SVR - Force Plot (first instance of the sample)
print("Saving SVR - Force plot...")
plt.figure(figsize=(14, 4))
expected_value = explainer_svr.expected_value
if isinstance(expected_value, (list, np.ndarray)) and hasattr(expected_value, '__iter__'):
    expected_value = expected_value[0]

shap.force_plot(
    expected_value, 
    shap_values_svr[0], # First row of SHAP values from the sample
    X_train_sample.iloc[0], # First row of data from the sample
    feature_names=feature_names_clean, # Corrected variable name from your original code
    matplotlib=True, 
    show=False
)
plt.title("SVR - Force Plot (Instance 1 from Sample)", fontsize=12, fontweight='bold')
plt.tight_layout()
plt.savefig("svr_force_plot_sample.png", dpi=300, bbox_inches='tight')
plt.show()

print("\nAll SVR SHAP plots saved successfully.")

SVR - SHAP Analysis on a sample of 500 data points
Features being analyzed: ['cement', 'slag', 'ash', 'water', 'superplasticizer', 'coarseagg', 'fineagg', 'water_cement_ratio', 'water_binder_ratio', 'age_bin', 'is_very_early', 'is_early_cure', 'is_7day', 'is_28day', 'is_mature', 'is_very_mature', 'has_slag', 'has_ash', 'has_superplasticizer']

Creating background data summary with k-means...
Provided model function fails when applied to the provided data set.


TypeError: 'SVR' object is not callable