diff --git a/plots/boxen-basic/implementations/letsplot.py b/plots/boxen-basic/implementations/letsplot.py new file mode 100644 index 0000000000..6f2c3a2065 --- /dev/null +++ b/plots/boxen-basic/implementations/letsplot.py @@ -0,0 +1,161 @@ +""" pyplots.ai +boxen-basic: Basic Boxen Plot (Letter-Value Plot) +Library: letsplot 4.8.2 | Python 3.13.11 +Quality: 91/100 | Created: 2026-01-09 +""" + +import numpy as np +import pandas as pd +from lets_plot import ( + LetsPlot, + aes, + element_text, + geom_point, + geom_rect, + geom_segment, + ggplot, + ggsave, + ggsize, + labs, + scale_fill_manual, + scale_x_continuous, + theme, + theme_minimal, +) + + +LetsPlot.setup_html() + +# Data - Generate realistic response times for different server endpoints +np.random.seed(42) +endpoints = ["API Gateway", "Auth Service", "Database", "Cache Layer"] +n_per_group = 2000 + +data = [] +# Realistic response time distributions (ms) with different characteristics +distributions = { + "API Gateway": {"base": 45, "scale": 20, "skew": 0.5}, + "Auth Service": {"base": 80, "scale": 35, "skew": 0.8}, + "Database": {"base": 120, "scale": 50, "skew": 1.2}, + "Cache Layer": {"base": 8, "scale": 5, "skew": 0.3}, +} + +for endpoint in endpoints: + d = distributions[endpoint] + # Generate log-normal like distribution for realistic response times + values = np.random.exponential(d["scale"], n_per_group) + d["base"] + # Add occasional slow requests (tail) + slow_idx = np.random.choice(n_per_group, size=int(n_per_group * 0.05), replace=False) + values[slow_idx] = values[slow_idx] * np.random.uniform(2, 5, len(slow_idx)) + data.extend([(endpoint, v) for v in values]) + +df = pd.DataFrame(data, columns=["endpoint", "response_time"]) + + +# Letter value names for legend +level_names = ["50%", "75%", "87.5%", "93.75%", "96.875%", "98.4%", "99.2%", "99.6%"] +level_colors = ["#306998", "#4A7FA8", "#6490B8", "#7EA1C8", "#98B2D8", "#B2C3E8", "#CCD4F8", "#E6E5FF"] + + +# Calculate letter values for boxen plot +def compute_letter_values(values, k=None): + """Compute letter values (quantiles) for boxen plot.""" + n = len(values) + if k is None: + # Number of letter values based on data size + k = int(np.log2(n)) - 1 + k = max(2, min(k, 8)) + + sorted_vals = np.sort(values) + letter_values = [] + + for i in range(k): + # Calculate the depth for each letter value + depth = 0.5 ** (i + 1) + lower_q = depth + upper_q = 1 - depth + + lower_val = np.percentile(sorted_vals, lower_q * 100) + upper_val = np.percentile(sorted_vals, upper_q * 100) + letter_values.append((lower_val, upper_val, level_names[i])) + + # Calculate outlier bounds (beyond deepest letter value) + deepest_lower = letter_values[-1][0] + deepest_upper = letter_values[-1][1] + outliers = sorted_vals[(sorted_vals < deepest_lower) | (sorted_vals > deepest_upper)] + + return letter_values, np.median(sorted_vals), outliers, k + + +# Compute letter values for each endpoint +box_data = [] +median_data = [] +outlier_data = [] +max_k = 0 + +x_positions = {endpoint: i for i, endpoint in enumerate(endpoints)} + +for endpoint in endpoints: + group_data = df[df["endpoint"] == endpoint]["response_time"].values + letter_vals, median, outliers, k = compute_letter_values(group_data) + max_k = max(max_k, k) + + x_pos = x_positions[endpoint] + + for idx, (lower, upper, level_name) in enumerate(letter_vals): + # Width decreases with depth + half_width = 0.4 * (0.85**idx) + box_data.append( + { + "x_min": x_pos - half_width, + "x_max": x_pos + half_width, + "y_min": lower, + "y_max": upper, + "level": level_name, + "endpoint": endpoint, + } + ) + + median_data.append({"x": x_pos - 0.38, "xend": x_pos + 0.38, "y": median, "endpoint": endpoint}) + + for o in outliers: + outlier_data.append({"x": x_pos, "y": o, "endpoint": endpoint}) + +box_df = pd.DataFrame(box_data) +median_df = pd.DataFrame(median_data) +outlier_df = pd.DataFrame(outlier_data) if outlier_data else pd.DataFrame(columns=["x", "y", "endpoint"]) + +# Plot using lets-plot +plot = ( + ggplot() + + geom_rect( + aes(xmin="x_min", xmax="x_max", ymin="y_min", ymax="y_max", fill="level"), + data=box_df, + alpha=0.9, + color="#1a1a1a", + size=0.5, + ) + + geom_segment(aes(x="x", xend="xend", y="y", yend="y"), data=median_df, color="#FFD43B", size=3) + + scale_fill_manual( + values=dict(zip(level_names[:max_k], level_colors[:max_k], strict=False)), name="Quantile Range" + ) + + scale_x_continuous(breaks=[0, 1, 2, 3], labels=endpoints) + + labs(x="Server Endpoint", y="Response Time (ms)", title="boxen-basic \u00b7 letsplot \u00b7 pyplots.ai") + + theme_minimal() + + theme( + axis_title=element_text(size=20), + axis_text=element_text(size=16), + plot_title=element_text(size=24), + legend_title=element_text(size=18), + legend_text=element_text(size=14), + ) + + ggsize(1600, 900) +) + +# Add outliers if present +if not outlier_df.empty: + plot = plot + geom_point(aes(x="x", y="y"), data=outlier_df, color="#DC2626", size=2, alpha=0.6) + +# Save +ggsave(plot, "plot.png", path=".", scale=3) +ggsave(plot, "plot.html", path=".") diff --git a/plots/boxen-basic/metadata/letsplot.yaml b/plots/boxen-basic/metadata/letsplot.yaml new file mode 100644 index 0000000000..afc9912dc9 --- /dev/null +++ b/plots/boxen-basic/metadata/letsplot.yaml @@ -0,0 +1,215 @@ +library: letsplot +specification_id: boxen-basic +created: '2026-01-09T08:12:01Z' +updated: '2026-01-09T08:14:47Z' +generated_by: claude-opus-4-5-20251101 +workflow_run: 20845378914 +issue: 3414 +python_version: 3.13.11 +library_version: 4.8.2 +preview_url: https://storage.googleapis.com/pyplots-images/plots/boxen-basic/letsplot/plot.png +preview_thumb: https://storage.googleapis.com/pyplots-images/plots/boxen-basic/letsplot/plot_thumb.png +preview_html: https://storage.googleapis.com/pyplots-images/plots/boxen-basic/letsplot/plot.html +quality_score: 91 +review: + strengths: + - Excellent visual representation of letter-value plot with clear nested box structure + - Realistic server response time scenario with appropriate data characteristics + (skewed distributions, tail behavior) + - Good color gradient from dark blue to light lavender that clearly shows quantile + depth + - Yellow median lines provide excellent contrast and visibility + - Legend clearly explains the quantile ranges + - Proper handling of outliers as distinct red points + weaknesses: + - Uses a helper function compute_letter_values() which violates the KISS principle + (imports → data → plot → save, no functions) + - Legend order shows 50% at top and 99.6% at bottom, which is counterintuitive to + visual interpretation + image_description: 'The plot displays a letter-value (boxen) plot comparing response + times across four server endpoints: API Gateway, Auth Service, Database, and Cache + Layer. Each endpoint shows nested rectangular boxes representing quantile ranges + from 50% (innermost, dark blue #306998) to 99.6% (outermost, light lavender). + The boxes decrease in width for deeper quantiles, creating the characteristic + boxen plot shape. Yellow/gold median lines are prominently displayed across each + distribution. Red dots mark outliers beyond the 99.6% quantile. The Database endpoint + shows the widest distribution and most outliers, while Cache Layer shows the tightest + distribution with lowest response times. The plot uses a minimal theme with subtle + grid lines and a clean legend on the right explaining the quantile ranges.' + criteria_checklist: + visual_quality: + score: 37 + max: 40 + items: + - id: VQ-01 + name: Text Legibility + score: 10 + max: 10 + passed: true + comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly + readable + - id: VQ-02 + name: No Overlap + score: 8 + max: 8 + passed: true + comment: No overlapping text elements, endpoint labels are well-spaced + - id: VQ-03 + name: Element Visibility + score: 7 + max: 8 + passed: true + comment: Boxes are clearly visible with good sizing; outlier points could + be slightly larger + - id: VQ-04 + name: Color Accessibility + score: 5 + max: 5 + passed: true + comment: Blue gradient palette is colorblind-safe; yellow median line provides + good contrast + - id: VQ-05 + name: Layout Balance + score: 5 + max: 5 + passed: true + comment: Plot fills canvas appropriately with balanced margins; legend well-positioned + - id: VQ-06 + name: Axis Labels + score: 2 + max: 2 + passed: true + comment: 'Descriptive labels with units: Response Time (ms) and Server Endpoint' + - id: VQ-07 + name: Grid & Legend + score: 0 + max: 2 + passed: false + comment: Legend quantile ordering is counterintuitive (50% at top, 99.6% at + bottom) + spec_compliance: + score: 24 + max: 25 + items: + - id: SC-01 + name: Plot Type + score: 8 + max: 8 + passed: true + comment: Correct boxen/letter-value plot with nested boxes + - id: SC-02 + name: Data Mapping + score: 5 + max: 5 + passed: true + comment: Categories on X-axis, values on Y-axis + - id: SC-03 + name: Required Features + score: 5 + max: 5 + passed: true + comment: Nested boxes, decreasing widths, outliers as points, legend explaining + quantile levels + - id: SC-04 + name: Data Range + score: 3 + max: 3 + passed: true + comment: All data visible including outliers up to ~1400ms + - id: SC-05 + name: Legend Accuracy + score: 2 + max: 2 + passed: true + comment: Legend correctly shows quantile range names + - id: SC-06 + name: Title Format + score: 1 + max: 2 + passed: true + comment: Uses correct format but with Unicode middot character + data_quality: + score: 20 + max: 20 + items: + - id: DQ-01 + name: Feature Coverage + score: 8 + max: 8 + passed: true + comment: 'Shows all aspects: different distribution shapes, varying spreads, + outliers, tail behavior' + - id: DQ-02 + name: Realistic Context + score: 7 + max: 7 + passed: true + comment: Server response times is a real, neutral scenario perfectly suited + for large dataset visualization + - id: DQ-03 + name: Appropriate Scale + score: 5 + max: 5 + passed: true + comment: Response times in realistic ranges (8-500ms base with occasional + slow requests up to 1400ms) + code_quality: + score: 7 + max: 10 + items: + - id: CQ-01 + name: KISS Structure + score: 0 + max: 3 + passed: false + comment: Uses a function compute_letter_values() which violates KISS principle + - id: CQ-02 + name: Reproducibility + score: 3 + max: 3 + passed: true + comment: Uses np.random.seed(42) for reproducibility + - id: CQ-03 + name: Clean Imports + score: 2 + max: 2 + passed: true + comment: All imports are used + - id: CQ-04 + name: No Deprecated API + score: 1 + max: 1 + passed: true + comment: Modern lets-plot API + - id: CQ-05 + name: Output Correct + score: 1 + max: 1 + passed: true + comment: Saves as plot.png and plot.html + library_features: + score: 3 + max: 5 + items: + - id: LF-01 + name: Distinctive Features + score: 3 + max: 5 + passed: true + comment: Uses ggplot2 grammar with geom_rect, geom_segment, geom_point. Manual + construction necessary as lets-plot has no native boxen geom. + verdict: APPROVED +impl_tags: + dependencies: [] + techniques: + - layer-composition + - manual-ticks + - html-export + patterns: + - data-generation + - iteration-over-groups + dataprep: + - binning + styling: + - alpha-blending + - edge-highlighting