diff --git a/plots/pp-basic/implementations/matplotlib.py b/plots/pp-basic/implementations/matplotlib.py new file mode 100644 index 0000000000..7975d667b9 --- /dev/null +++ b/plots/pp-basic/implementations/matplotlib.py @@ -0,0 +1,121 @@ +""" pyplots.ai +pp-basic: Probability-Probability (P-P) Plot +Library: matplotlib 3.10.8 | Python 3.14.3 +Quality: 90/100 | Created: 2026-03-15 +""" + +import os as _os +import sys as _sys + + +# Avoid filename shadowing: remove script dir so 'matplotlib' resolves to the real package +_script_dir = _os.path.abspath(_os.path.dirname(__file__)) +_sys.path = [ + p for p in _sys.path if _os.path.abspath(p) != _script_dir and not (p == "" and _os.getcwd() == _script_dir) +] +_sys.modules.pop("matplotlib", None) + +from math import erf, sqrt # noqa: E402 + +import matplotlib.patheffects as pe # noqa: E402 +import matplotlib.pyplot as plt # noqa: E402 +import matplotlib.ticker as mticker # noqa: E402 +import numpy as np # noqa: E402 + + +_sys.path.insert(0, _script_dir) + +# Vectorized normal CDF without scipy +normal_cdf = np.vectorize(lambda x: 0.5 * (1 + erf(x / sqrt(2)))) + +# Data — manufacturing quality control: bolt tensile strength measurements +# Mixture simulates a batch with ~20% from a slightly different supplier +np.random.seed(42) +sample_size = 200 +primary_batch = np.random.normal(loc=840, scale=35, size=160) # MPa +secondary_batch = np.random.normal(loc=910, scale=28, size=40) # MPa +tensile_strength = np.concatenate([primary_batch, secondary_batch]) + +observed_sorted = np.sort(tensile_strength) +empirical_cdf = np.arange(1, sample_size + 1) / (sample_size + 1) + +mu, sigma = observed_sorted.mean(), observed_sorted.std(ddof=0) +theoretical_cdf = normal_cdf((observed_sorted - mu) / sigma) + +# Deviation from diagonal for color-coding +deviation = empirical_cdf - theoretical_cdf + +# Plot +fig, ax = plt.subplots(figsize=(12, 12)) + +# 95% confidence band using order-statistic variance +band_x = np.linspace(0, 1, 200) +se = np.sqrt(band_x * (1 - band_x) / sample_size) +ax.fill_between( + band_x, band_x - 1.96 * se, band_x + 1.96 * se, color="#306998", alpha=0.08, zorder=0, label="95% confidence band" +) + +# Reference line with path effect for visual depth +ref_line = ax.plot([0, 1], [0, 1], color="#888888", linewidth=1.8, linestyle="--", zorder=1, label="Perfect normal fit") +ref_line[0].set_path_effects([pe.Stroke(linewidth=3.5, foreground="#DDDDDD"), pe.Normal()]) + +# Scatter — color encodes deviation magnitude for storytelling +colors = np.where(np.abs(deviation) > 0.03, "#C44E52", "#306998") +ax.scatter(theoretical_cdf, empirical_cdf, s=70, c=colors, alpha=0.65, edgecolors="white", linewidth=0.6, zorder=3) + +# Annotate the S-shaped deviation region +dev_mask = np.abs(deviation) > 0.03 +if dev_mask.any(): + dev_indices = np.where(dev_mask)[0] + mid = dev_indices[len(dev_indices) // 2] + ax.annotate( + "Heavier upper tail\n(secondary supplier batch)", + xy=(theoretical_cdf[mid], empirical_cdf[mid]), + xytext=(0.25, 0.82), + fontsize=14, + color="#C44E52", + fontweight="medium", + arrowprops={"arrowstyle": "-|>", "color": "#C44E52", "lw": 1.5, "connectionstyle": "arc3,rad=-0.2"}, + bbox={"boxstyle": "round,pad=0.4", "facecolor": "white", "edgecolor": "#C44E52", "alpha": 0.9}, + zorder=5, + ) + +# Style +ax.set_xlabel("Theoretical Cumulative Probability (Normal)", fontsize=20) +ax.set_ylabel("Empirical Cumulative Probability", fontsize=20) +ax.set_title("pp-basic · matplotlib · pyplots.ai", fontsize=24, fontweight="medium", pad=12) +ax.tick_params(axis="both", labelsize=16) +ax.xaxis.set_major_locator(mticker.MultipleLocator(0.2)) +ax.yaxis.set_major_locator(mticker.MultipleLocator(0.2)) +ax.xaxis.set_minor_locator(mticker.MultipleLocator(0.1)) +ax.yaxis.set_minor_locator(mticker.MultipleLocator(0.1)) +ax.set_xlim(-0.02, 1.02) +ax.set_ylim(-0.02, 1.02) +ax.set_aspect("equal") +ax.spines["top"].set_visible(False) +ax.spines["right"].set_visible(False) +ax.spines["left"].set_linewidth(0.8) +ax.spines["bottom"].set_linewidth(0.8) +ax.spines["left"].set_color("#555555") +ax.spines["bottom"].set_color("#555555") +ax.yaxis.grid(True, alpha=0.15, linewidth=0.6, which="major") +ax.xaxis.grid(True, alpha=0.15, linewidth=0.6, which="major") +ax.yaxis.grid(True, alpha=0.06, linewidth=0.4, which="minor") +ax.xaxis.grid(True, alpha=0.06, linewidth=0.4, which="minor") + +# Legend +ax.legend(fontsize=14, loc="lower right", framealpha=0.9, edgecolor="#CCCCCC") + +# Subtitle with domain context +fig.text( + 0.5, + 0.96, + "Bolt tensile strength (MPa) vs. normal distribution — quality control diagnostic", + ha="center", + fontsize=14, + color="#666666", + style="italic", +) + +plt.subplots_adjust(top=0.91) +plt.savefig("plot.png", dpi=300, bbox_inches="tight") diff --git a/plots/pp-basic/metadata/matplotlib.yaml b/plots/pp-basic/metadata/matplotlib.yaml new file mode 100644 index 0000000000..35e4e3eadb --- /dev/null +++ b/plots/pp-basic/metadata/matplotlib.yaml @@ -0,0 +1,230 @@ +library: matplotlib +specification_id: pp-basic +created: '2026-03-15T20:16:45Z' +updated: '2026-03-15T20:35:11Z' +generated_by: claude-opus-4-5-20251101 +workflow_run: 23118400187 +issue: 4587 +python_version: 3.14.3 +library_version: 3.10.8 +preview_url: https://storage.googleapis.com/pyplots-images/plots/pp-basic/matplotlib/plot.png +preview_thumb: https://storage.googleapis.com/pyplots-images/plots/pp-basic/matplotlib/plot_thumb.png +preview_html: null +quality_score: 90 +review: + strengths: + - Excellent data storytelling through color-coded deviations and domain-contextual + annotation + - 'Strong visual refinement: path effects, thinned/colored spines, dual-level grid, + confidence band' + - Perfect spec compliance with all required P-P plot features including square aspect + ratio and i/(n+1) formula + - Realistic manufacturing quality control scenario with plausible bolt tensile strength + values + - 95% confidence band adds genuine statistical value beyond spec requirements + weaknesses: + - Marker size (s=70) could be slightly larger for better visibility at 200 points + - Blue/red color pairing could be improved for deuteranopia accessibility + image_description: 'The plot displays a P-P (Probability-Probability) plot on a + square canvas with both axes ranging from 0 to 1. The x-axis is labeled "Theoretical + Cumulative Probability (Normal)" and the y-axis "Empirical Cumulative Probability." + The title reads "pp-basic · matplotlib · pyplots.ai" with an italic subtitle "Bolt + tensile strength (MPa) vs. normal distribution — quality control diagnostic." + Data points are colored in two tones: Python Blue (#306998) for points near the + diagonal and coral red (#C44E52) for points deviating more than 0.03 from the + reference line. A dashed gray 45-degree reference line with a subtle stroke path + effect runs diagonally. A light blue 95% confidence band surrounds the reference + line. An annotation with a curved arrow points to the deviation region, reading + "Heavier upper tail (secondary supplier batch)." The legend in the lower right + shows "95% confidence band" and "Perfect normal fit." Top and right spines are + removed; remaining spines are thinned and colored gray. Both major and minor grid + lines are present at very low alpha. The S-shaped departure from the diagonal + in the upper portion is clearly visible, telling the story of a mixture distribution.' + criteria_checklist: + visual_quality: + score: 28 + max: 30 + items: + - id: VQ-01 + name: Text Legibility + score: 8 + max: 8 + passed: true + comment: 'All font sizes explicitly set: title 24pt, labels 20pt, ticks 16pt, + legend 14pt, annotation 14pt' + - id: VQ-02 + name: No Overlap + score: 6 + max: 6 + passed: true + comment: No overlapping text; annotation well-positioned away from data + - id: VQ-03 + name: Element Visibility + score: 5 + max: 6 + passed: true + comment: s=70 for 200 points within guideline range but on smaller side; alpha=0.65 + appropriate + - id: VQ-04 + name: Color Accessibility + score: 3 + max: 4 + passed: true + comment: Blue vs coral/red generally colorblind-safe but not optimal for all + types + - id: VQ-05 + name: Layout & Canvas + score: 4 + max: 4 + passed: true + comment: Square aspect ratio well-utilized, balanced margins, nothing cut + off + - id: VQ-06 + name: Axis Labels & Title + score: 2 + max: 2 + passed: true + comment: Descriptive labels with distribution context specified + design_excellence: + score: 16 + max: 20 + items: + - id: DE-01 + name: Aesthetic Sophistication + score: 6 + max: 8 + passed: true + comment: 'Strong design: custom two-color deviation encoding, path effects, + confidence band, italic subtitle' + - id: DE-02 + name: Visual Refinement + score: 5 + max: 6 + passed: true + comment: Spines removed/thinned, subtle major+minor grid, generous whitespace + - id: DE-03 + name: Data Storytelling + score: 5 + max: 6 + passed: true + comment: Color-coded deviations, annotation explains cause, subtitle provides + domain context + spec_compliance: + score: 15 + max: 15 + items: + - id: SC-01 + name: Plot Type + score: 5 + max: 5 + passed: true + comment: 'Correct P-P plot: empirical CDF vs theoretical CDF' + - id: SC-02 + name: Required Features + score: 4 + max: 4 + passed: true + comment: 'All features: reference line, square aspect, i/(n+1) formula, theoretical + CDF, S-shaped deviation' + - id: SC-03 + name: Data Mapping + score: 3 + max: 3 + passed: true + comment: X=theoretical CDF, Y=empirical CDF, both 0-1 + - id: SC-04 + name: Title & Legend + score: 3 + max: 3 + passed: true + comment: Title format correct; legend labels meaningful + data_quality: + score: 15 + max: 15 + items: + - id: DQ-01 + name: Feature Coverage + score: 6 + max: 6 + passed: true + comment: Mixture distribution creates clear S-shaped departure, showing both + conforming and deviating regions + - id: DQ-02 + name: Realistic Context + score: 5 + max: 5 + passed: true + comment: Bolt tensile strength for quality control is real, neutral engineering + scenario + - id: DQ-03 + name: Appropriate Scale + score: 4 + max: 4 + passed: true + comment: 840/910 MPa with sigma 28-35 realistic for steel bolt tensile strength + code_quality: + score: 9 + max: 10 + items: + - id: CQ-01 + name: KISS Structure + score: 2 + max: 3 + passed: true + comment: Linear flow but sys.path manipulation adds boilerplate; vectorized + normal_cdf borderline function definition + - id: CQ-02 + name: Reproducibility + score: 2 + max: 2 + passed: true + comment: np.random.seed(42) set + - id: CQ-03 + name: Clean Imports + score: 2 + max: 2 + passed: true + comment: All imports used + - id: CQ-04 + name: Code Elegance + score: 2 + max: 2 + passed: true + comment: Clean, appropriate complexity, no fake UI + - id: CQ-05 + name: Output & API + score: 1 + max: 1 + passed: true + comment: Saves as plot.png, dpi=300, bbox_inches=tight + library_mastery: + score: 7 + max: 10 + items: + - id: LM-01 + name: Idiomatic Usage + score: 4 + max: 5 + passed: true + comment: Consistent Axes methods, mticker for tick control, fill_between for + confidence band + - id: LM-02 + name: Distinctive Features + score: 3 + max: 5 + passed: true + comment: Uses patheffects for reference line depth, MultipleLocator for precise + ticks + verdict: APPROVED +impl_tags: + dependencies: [] + techniques: + - annotations + - manual-ticks + patterns: + - data-generation + dataprep: [] + styling: + - alpha-blending + - edge-highlighting + - grid-styling