Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions plots/pp-basic/implementations/matplotlib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
""" pyplots.ai
pp-basic: Probability-Probability (P-P) Plot
Library: matplotlib 3.10.8 | Python 3.14.3
Quality: 90/100 | Created: 2026-03-15
"""

import os as _os
import sys as _sys


# Avoid filename shadowing: remove script dir so 'matplotlib' resolves to the real package
_script_dir = _os.path.abspath(_os.path.dirname(__file__))
_sys.path = [
p for p in _sys.path if _os.path.abspath(p) != _script_dir and not (p == "" and _os.getcwd() == _script_dir)
]
_sys.modules.pop("matplotlib", None)

from math import erf, sqrt # noqa: E402

import matplotlib.patheffects as pe # noqa: E402
import matplotlib.pyplot as plt # noqa: E402
import matplotlib.ticker as mticker # noqa: E402
import numpy as np # noqa: E402


_sys.path.insert(0, _script_dir)

# Vectorized normal CDF without scipy
normal_cdf = np.vectorize(lambda x: 0.5 * (1 + erf(x / sqrt(2))))

# Data — manufacturing quality control: bolt tensile strength measurements
# Mixture simulates a batch with ~20% from a slightly different supplier
np.random.seed(42)
sample_size = 200
primary_batch = np.random.normal(loc=840, scale=35, size=160) # MPa
secondary_batch = np.random.normal(loc=910, scale=28, size=40) # MPa
tensile_strength = np.concatenate([primary_batch, secondary_batch])

observed_sorted = np.sort(tensile_strength)
empirical_cdf = np.arange(1, sample_size + 1) / (sample_size + 1)

mu, sigma = observed_sorted.mean(), observed_sorted.std(ddof=0)
theoretical_cdf = normal_cdf((observed_sorted - mu) / sigma)

# Deviation from diagonal for color-coding
deviation = empirical_cdf - theoretical_cdf

# Plot
fig, ax = plt.subplots(figsize=(12, 12))

# 95% confidence band using order-statistic variance
band_x = np.linspace(0, 1, 200)
se = np.sqrt(band_x * (1 - band_x) / sample_size)
ax.fill_between(
band_x, band_x - 1.96 * se, band_x + 1.96 * se, color="#306998", alpha=0.08, zorder=0, label="95% confidence band"
)

# Reference line with path effect for visual depth
ref_line = ax.plot([0, 1], [0, 1], color="#888888", linewidth=1.8, linestyle="--", zorder=1, label="Perfect normal fit")
ref_line[0].set_path_effects([pe.Stroke(linewidth=3.5, foreground="#DDDDDD"), pe.Normal()])

# Scatter — color encodes deviation magnitude for storytelling
colors = np.where(np.abs(deviation) > 0.03, "#C44E52", "#306998")
ax.scatter(theoretical_cdf, empirical_cdf, s=70, c=colors, alpha=0.65, edgecolors="white", linewidth=0.6, zorder=3)

# Annotate the S-shaped deviation region
dev_mask = np.abs(deviation) > 0.03
if dev_mask.any():
dev_indices = np.where(dev_mask)[0]
mid = dev_indices[len(dev_indices) // 2]
ax.annotate(
"Heavier upper tail\n(secondary supplier batch)",
xy=(theoretical_cdf[mid], empirical_cdf[mid]),
xytext=(0.25, 0.82),
fontsize=14,
color="#C44E52",
fontweight="medium",
arrowprops={"arrowstyle": "-|>", "color": "#C44E52", "lw": 1.5, "connectionstyle": "arc3,rad=-0.2"},
bbox={"boxstyle": "round,pad=0.4", "facecolor": "white", "edgecolor": "#C44E52", "alpha": 0.9},
zorder=5,
)

# Style
ax.set_xlabel("Theoretical Cumulative Probability (Normal)", fontsize=20)
ax.set_ylabel("Empirical Cumulative Probability", fontsize=20)
ax.set_title("pp-basic · matplotlib · pyplots.ai", fontsize=24, fontweight="medium", pad=12)
ax.tick_params(axis="both", labelsize=16)
ax.xaxis.set_major_locator(mticker.MultipleLocator(0.2))
ax.yaxis.set_major_locator(mticker.MultipleLocator(0.2))
ax.xaxis.set_minor_locator(mticker.MultipleLocator(0.1))
ax.yaxis.set_minor_locator(mticker.MultipleLocator(0.1))
ax.set_xlim(-0.02, 1.02)
ax.set_ylim(-0.02, 1.02)
ax.set_aspect("equal")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_linewidth(0.8)
ax.spines["bottom"].set_linewidth(0.8)
ax.spines["left"].set_color("#555555")
ax.spines["bottom"].set_color("#555555")
ax.yaxis.grid(True, alpha=0.15, linewidth=0.6, which="major")
ax.xaxis.grid(True, alpha=0.15, linewidth=0.6, which="major")
ax.yaxis.grid(True, alpha=0.06, linewidth=0.4, which="minor")
ax.xaxis.grid(True, alpha=0.06, linewidth=0.4, which="minor")

# Legend
ax.legend(fontsize=14, loc="lower right", framealpha=0.9, edgecolor="#CCCCCC")

# Subtitle with domain context
fig.text(
0.5,
0.96,
"Bolt tensile strength (MPa) vs. normal distribution — quality control diagnostic",
ha="center",
fontsize=14,
color="#666666",
style="italic",
)

plt.subplots_adjust(top=0.91)
plt.savefig("plot.png", dpi=300, bbox_inches="tight")
230 changes: 230 additions & 0 deletions plots/pp-basic/metadata/matplotlib.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
library: matplotlib
specification_id: pp-basic
created: '2026-03-15T20:16:45Z'
updated: '2026-03-15T20:35:11Z'
generated_by: claude-opus-4-5-20251101
workflow_run: 23118400187
issue: 4587
python_version: 3.14.3
library_version: 3.10.8
preview_url: https://storage.googleapis.com/pyplots-images/plots/pp-basic/matplotlib/plot.png
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/pp-basic/matplotlib/plot_thumb.png
preview_html: null
quality_score: 90
review:
strengths:
- Excellent data storytelling through color-coded deviations and domain-contextual
annotation
- 'Strong visual refinement: path effects, thinned/colored spines, dual-level grid,
confidence band'
- Perfect spec compliance with all required P-P plot features including square aspect
ratio and i/(n+1) formula
- Realistic manufacturing quality control scenario with plausible bolt tensile strength
values
- 95% confidence band adds genuine statistical value beyond spec requirements
weaknesses:
- Marker size (s=70) could be slightly larger for better visibility at 200 points
- Blue/red color pairing could be improved for deuteranopia accessibility
image_description: 'The plot displays a P-P (Probability-Probability) plot on a
square canvas with both axes ranging from 0 to 1. The x-axis is labeled "Theoretical
Cumulative Probability (Normal)" and the y-axis "Empirical Cumulative Probability."
The title reads "pp-basic · matplotlib · pyplots.ai" with an italic subtitle "Bolt
tensile strength (MPa) vs. normal distribution — quality control diagnostic."
Data points are colored in two tones: Python Blue (#306998) for points near the
diagonal and coral red (#C44E52) for points deviating more than 0.03 from the
reference line. A dashed gray 45-degree reference line with a subtle stroke path
effect runs diagonally. A light blue 95% confidence band surrounds the reference
line. An annotation with a curved arrow points to the deviation region, reading
"Heavier upper tail (secondary supplier batch)." The legend in the lower right
shows "95% confidence band" and "Perfect normal fit." Top and right spines are
removed; remaining spines are thinned and colored gray. Both major and minor grid
lines are present at very low alpha. The S-shaped departure from the diagonal
in the upper portion is clearly visible, telling the story of a mixture distribution.'
criteria_checklist:
visual_quality:
score: 28
max: 30
items:
- id: VQ-01
name: Text Legibility
score: 8
max: 8
passed: true
comment: 'All font sizes explicitly set: title 24pt, labels 20pt, ticks 16pt,
legend 14pt, annotation 14pt'
- id: VQ-02
name: No Overlap
score: 6
max: 6
passed: true
comment: No overlapping text; annotation well-positioned away from data
- id: VQ-03
name: Element Visibility
score: 5
max: 6
passed: true
comment: s=70 for 200 points within guideline range but on smaller side; alpha=0.65
appropriate
- id: VQ-04
name: Color Accessibility
score: 3
max: 4
passed: true
comment: Blue vs coral/red generally colorblind-safe but not optimal for all
types
- id: VQ-05
name: Layout & Canvas
score: 4
max: 4
passed: true
comment: Square aspect ratio well-utilized, balanced margins, nothing cut
off
- id: VQ-06
name: Axis Labels & Title
score: 2
max: 2
passed: true
comment: Descriptive labels with distribution context specified
design_excellence:
score: 16
max: 20
items:
- id: DE-01
name: Aesthetic Sophistication
score: 6
max: 8
passed: true
comment: 'Strong design: custom two-color deviation encoding, path effects,
confidence band, italic subtitle'
- id: DE-02
name: Visual Refinement
score: 5
max: 6
passed: true
comment: Spines removed/thinned, subtle major+minor grid, generous whitespace
- id: DE-03
name: Data Storytelling
score: 5
max: 6
passed: true
comment: Color-coded deviations, annotation explains cause, subtitle provides
domain context
spec_compliance:
score: 15
max: 15
items:
- id: SC-01
name: Plot Type
score: 5
max: 5
passed: true
comment: 'Correct P-P plot: empirical CDF vs theoretical CDF'
- id: SC-02
name: Required Features
score: 4
max: 4
passed: true
comment: 'All features: reference line, square aspect, i/(n+1) formula, theoretical
CDF, S-shaped deviation'
- id: SC-03
name: Data Mapping
score: 3
max: 3
passed: true
comment: X=theoretical CDF, Y=empirical CDF, both 0-1
- id: SC-04
name: Title & Legend
score: 3
max: 3
passed: true
comment: Title format correct; legend labels meaningful
data_quality:
score: 15
max: 15
items:
- id: DQ-01
name: Feature Coverage
score: 6
max: 6
passed: true
comment: Mixture distribution creates clear S-shaped departure, showing both
conforming and deviating regions
- id: DQ-02
name: Realistic Context
score: 5
max: 5
passed: true
comment: Bolt tensile strength for quality control is real, neutral engineering
scenario
- id: DQ-03
name: Appropriate Scale
score: 4
max: 4
passed: true
comment: 840/910 MPa with sigma 28-35 realistic for steel bolt tensile strength
code_quality:
score: 9
max: 10
items:
- id: CQ-01
name: KISS Structure
score: 2
max: 3
passed: true
comment: Linear flow but sys.path manipulation adds boilerplate; vectorized
normal_cdf borderline function definition
- id: CQ-02
name: Reproducibility
score: 2
max: 2
passed: true
comment: np.random.seed(42) set
- id: CQ-03
name: Clean Imports
score: 2
max: 2
passed: true
comment: All imports used
- id: CQ-04
name: Code Elegance
score: 2
max: 2
passed: true
comment: Clean, appropriate complexity, no fake UI
- id: CQ-05
name: Output & API
score: 1
max: 1
passed: true
comment: Saves as plot.png, dpi=300, bbox_inches=tight
library_mastery:
score: 7
max: 10
items:
- id: LM-01
name: Idiomatic Usage
score: 4
max: 5
passed: true
comment: Consistent Axes methods, mticker for tick control, fill_between for
confidence band
- id: LM-02
name: Distinctive Features
score: 3
max: 5
passed: true
comment: Uses patheffects for reference line depth, MultipleLocator for precise
ticks
verdict: APPROVED
impl_tags:
dependencies: []
techniques:
- annotations
- manual-ticks
patterns:
- data-generation
dataprep: []
styling:
- alpha-blending
- edge-highlighting
- grid-styling
Loading