Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions plots/pdp-basic/implementations/matplotlib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
""" pyplots.ai
pdp-basic: Partial Dependence Plot
Library: matplotlib 3.10.8 | Python 3.13.11
Quality: 92/100 | Created: 2025-12-31
"""

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.inspection import partial_dependence


# Data: Train a gradient boosting model and compute partial dependence
np.random.seed(42)
X, y = make_regression(n_samples=500, n_features=5, noise=15, random_state=42)

# Train model
model = GradientBoostingRegressor(n_estimators=100, max_depth=4, random_state=42)
model.fit(X, y)

# Compute partial dependence for feature 0
feature_idx = 0

# Get partial dependence using sklearn
pd_result = partial_dependence(model, X, features=[feature_idx], kind="both", grid_resolution=80)
pdp_values = pd_result["average"][0]
ice_lines = pd_result["individual"][0]
grid_values = pd_result["grid_values"][0]

# Calculate confidence interval (mean ± std of ICE lines)
ice_mean = pdp_values
ice_std = np.std(ice_lines, axis=0)

# Create plot (4800x2700 px)
fig, ax = plt.subplots(figsize=(16, 9))

# Plot ICE lines (faint individual lines)
for i in range(0, len(ice_lines), 10): # Sample every 10th line for clarity
ax.plot(grid_values, ice_lines[i], color="#306998", alpha=0.1, linewidth=1)

# Plot confidence band
ax.fill_between(
grid_values,
ice_mean - 1.96 * ice_std,
ice_mean + 1.96 * ice_std,
alpha=0.25,
color="#306998",
label="95% Confidence Interval",
)

# Plot main PDP line
ax.plot(grid_values, pdp_values, color="#306998", linewidth=4, label="Partial Dependence")

# Add rug plot showing data distribution
rug_y = ax.get_ylim()[0]
ax.scatter(
X[:, feature_idx], np.full(len(X), rug_y), marker="|", color="#FFD43B", alpha=0.4, s=100, label="Data Distribution"
)

# Labels and styling
ax.set_xlabel("Feature Value", fontsize=20)
ax.set_ylabel("Partial Dependence (Predicted Value)", fontsize=20)
ax.set_title("pdp-basic · matplotlib · pyplots.ai", fontsize=24)
ax.tick_params(axis="both", labelsize=16)
ax.legend(fontsize=16, loc="upper left")
ax.grid(True, alpha=0.3, linestyle="--")

plt.tight_layout()
plt.savefig("plot.png", dpi=300, bbox_inches="tight")
29 changes: 29 additions & 0 deletions plots/pdp-basic/metadata/matplotlib.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
library: matplotlib
specification_id: pdp-basic
created: '2025-12-31T05:34:35Z'
updated: '2025-12-31T05:46:31Z'
generated_by: claude-opus-4-5-20251101
workflow_run: 20612797918
issue: 2922
python_version: 3.13.11
library_version: 3.10.8
preview_url: https://storage.googleapis.com/pyplots-images/plots/pdp-basic/matplotlib/plot.png
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/pdp-basic/matplotlib/plot_thumb.png
preview_html: null
quality_score: 92
review:
strengths:
- Excellent implementation of PDP with ICE lines using sklearn partial_dependence
function with kind=both
- Proper confidence band visualization using fill_between with mean ± 1.96*std
- Rug plot effectively shows training data distribution along x-axis
- Clean, readable code that follows KISS principles
- Good colorblind-safe color scheme with Python blue and yellow
- Appropriate text sizing for 4800x2700 canvas
weaknesses:
- Legend in upper left slightly overlaps with ICE lines - consider frameon=True
with white background
- Rug plot markers (s=100) could be larger (s=150-200) for better visibility on
high-resolution canvas
- Y-axis range dominated by wide confidence band making PDP line variation appear
minimal