Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions plots/boxen-basic/implementations/letsplot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
""" pyplots.ai
boxen-basic: Basic Boxen Plot (Letter-Value Plot)
Library: letsplot 4.8.2 | Python 3.13.11
Quality: 91/100 | Created: 2026-01-09
"""

import numpy as np
import pandas as pd
from lets_plot import (
LetsPlot,
aes,
element_text,
geom_point,
geom_rect,
geom_segment,
ggplot,
ggsave,
ggsize,
labs,
scale_fill_manual,
scale_x_continuous,
theme,
theme_minimal,
)


LetsPlot.setup_html()

# Data - Generate realistic response times for different server endpoints
np.random.seed(42)
endpoints = ["API Gateway", "Auth Service", "Database", "Cache Layer"]
n_per_group = 2000

data = []
# Realistic response time distributions (ms) with different characteristics
distributions = {
"API Gateway": {"base": 45, "scale": 20, "skew": 0.5},
"Auth Service": {"base": 80, "scale": 35, "skew": 0.8},
"Database": {"base": 120, "scale": 50, "skew": 1.2},
"Cache Layer": {"base": 8, "scale": 5, "skew": 0.3},
}

for endpoint in endpoints:
d = distributions[endpoint]
# Generate log-normal like distribution for realistic response times
values = np.random.exponential(d["scale"], n_per_group) + d["base"]
# Add occasional slow requests (tail)
slow_idx = np.random.choice(n_per_group, size=int(n_per_group * 0.05), replace=False)
values[slow_idx] = values[slow_idx] * np.random.uniform(2, 5, len(slow_idx))
data.extend([(endpoint, v) for v in values])

df = pd.DataFrame(data, columns=["endpoint", "response_time"])


# Letter value names for legend
level_names = ["50%", "75%", "87.5%", "93.75%", "96.875%", "98.4%", "99.2%", "99.6%"]
level_colors = ["#306998", "#4A7FA8", "#6490B8", "#7EA1C8", "#98B2D8", "#B2C3E8", "#CCD4F8", "#E6E5FF"]


# Calculate letter values for boxen plot
def compute_letter_values(values, k=None):
"""Compute letter values (quantiles) for boxen plot."""
n = len(values)
if k is None:
# Number of letter values based on data size
k = int(np.log2(n)) - 1
k = max(2, min(k, 8))

sorted_vals = np.sort(values)
letter_values = []

for i in range(k):
# Calculate the depth for each letter value
depth = 0.5 ** (i + 1)
lower_q = depth
upper_q = 1 - depth

lower_val = np.percentile(sorted_vals, lower_q * 100)
upper_val = np.percentile(sorted_vals, upper_q * 100)
letter_values.append((lower_val, upper_val, level_names[i]))

# Calculate outlier bounds (beyond deepest letter value)
deepest_lower = letter_values[-1][0]
deepest_upper = letter_values[-1][1]
outliers = sorted_vals[(sorted_vals < deepest_lower) | (sorted_vals > deepest_upper)]

return letter_values, np.median(sorted_vals), outliers, k


# Compute letter values for each endpoint
box_data = []
median_data = []
outlier_data = []
max_k = 0

x_positions = {endpoint: i for i, endpoint in enumerate(endpoints)}

for endpoint in endpoints:
group_data = df[df["endpoint"] == endpoint]["response_time"].values
letter_vals, median, outliers, k = compute_letter_values(group_data)
max_k = max(max_k, k)

x_pos = x_positions[endpoint]

for idx, (lower, upper, level_name) in enumerate(letter_vals):
# Width decreases with depth
half_width = 0.4 * (0.85**idx)
box_data.append(
{
"x_min": x_pos - half_width,
"x_max": x_pos + half_width,
"y_min": lower,
"y_max": upper,
"level": level_name,
"endpoint": endpoint,
}
)

median_data.append({"x": x_pos - 0.38, "xend": x_pos + 0.38, "y": median, "endpoint": endpoint})

for o in outliers:
outlier_data.append({"x": x_pos, "y": o, "endpoint": endpoint})

box_df = pd.DataFrame(box_data)
median_df = pd.DataFrame(median_data)
outlier_df = pd.DataFrame(outlier_data) if outlier_data else pd.DataFrame(columns=["x", "y", "endpoint"])

# Plot using lets-plot
plot = (
ggplot()
+ geom_rect(
aes(xmin="x_min", xmax="x_max", ymin="y_min", ymax="y_max", fill="level"),
data=box_df,
alpha=0.9,
color="#1a1a1a",
size=0.5,
)
+ geom_segment(aes(x="x", xend="xend", y="y", yend="y"), data=median_df, color="#FFD43B", size=3)
+ scale_fill_manual(
values=dict(zip(level_names[:max_k], level_colors[:max_k], strict=False)), name="Quantile Range"
)
+ scale_x_continuous(breaks=[0, 1, 2, 3], labels=endpoints)
+ labs(x="Server Endpoint", y="Response Time (ms)", title="boxen-basic \u00b7 letsplot \u00b7 pyplots.ai")
+ theme_minimal()
+ theme(
axis_title=element_text(size=20),
axis_text=element_text(size=16),
plot_title=element_text(size=24),
legend_title=element_text(size=18),
legend_text=element_text(size=14),
)
+ ggsize(1600, 900)
)

# Add outliers if present
if not outlier_df.empty:
plot = plot + geom_point(aes(x="x", y="y"), data=outlier_df, color="#DC2626", size=2, alpha=0.6)

# Save
ggsave(plot, "plot.png", path=".", scale=3)
ggsave(plot, "plot.html", path=".")
215 changes: 215 additions & 0 deletions plots/boxen-basic/metadata/letsplot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
library: letsplot
specification_id: boxen-basic
created: '2026-01-09T08:12:01Z'
updated: '2026-01-09T08:14:47Z'
generated_by: claude-opus-4-5-20251101
workflow_run: 20845378914
issue: 3414
python_version: 3.13.11
library_version: 4.8.2
preview_url: https://storage.googleapis.com/pyplots-images/plots/boxen-basic/letsplot/plot.png
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/boxen-basic/letsplot/plot_thumb.png
preview_html: https://storage.googleapis.com/pyplots-images/plots/boxen-basic/letsplot/plot.html
quality_score: 91
review:
strengths:
- Excellent visual representation of letter-value plot with clear nested box structure
- Realistic server response time scenario with appropriate data characteristics
(skewed distributions, tail behavior)
- Good color gradient from dark blue to light lavender that clearly shows quantile
depth
- Yellow median lines provide excellent contrast and visibility
- Legend clearly explains the quantile ranges
- Proper handling of outliers as distinct red points
weaknesses:
- Uses a helper function compute_letter_values() which violates the KISS principle
(imports → data → plot → save, no functions)
- Legend order shows 50% at top and 99.6% at bottom, which is counterintuitive to
visual interpretation
image_description: 'The plot displays a letter-value (boxen) plot comparing response
times across four server endpoints: API Gateway, Auth Service, Database, and Cache
Layer. Each endpoint shows nested rectangular boxes representing quantile ranges
from 50% (innermost, dark blue #306998) to 99.6% (outermost, light lavender).
The boxes decrease in width for deeper quantiles, creating the characteristic
boxen plot shape. Yellow/gold median lines are prominently displayed across each
distribution. Red dots mark outliers beyond the 99.6% quantile. The Database endpoint
shows the widest distribution and most outliers, while Cache Layer shows the tightest
distribution with lowest response times. The plot uses a minimal theme with subtle
grid lines and a clean legend on the right explaining the quantile ranges.'
criteria_checklist:
visual_quality:
score: 37
max: 40
items:
- id: VQ-01
name: Text Legibility
score: 10
max: 10
passed: true
comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
readable
- id: VQ-02
name: No Overlap
score: 8
max: 8
passed: true
comment: No overlapping text elements, endpoint labels are well-spaced
- id: VQ-03
name: Element Visibility
score: 7
max: 8
passed: true
comment: Boxes are clearly visible with good sizing; outlier points could
be slightly larger
- id: VQ-04
name: Color Accessibility
score: 5
max: 5
passed: true
comment: Blue gradient palette is colorblind-safe; yellow median line provides
good contrast
- id: VQ-05
name: Layout Balance
score: 5
max: 5
passed: true
comment: Plot fills canvas appropriately with balanced margins; legend well-positioned
- id: VQ-06
name: Axis Labels
score: 2
max: 2
passed: true
comment: 'Descriptive labels with units: Response Time (ms) and Server Endpoint'
- id: VQ-07
name: Grid & Legend
score: 0
max: 2
passed: false
comment: Legend quantile ordering is counterintuitive (50% at top, 99.6% at
bottom)
spec_compliance:
score: 24
max: 25
items:
- id: SC-01
name: Plot Type
score: 8
max: 8
passed: true
comment: Correct boxen/letter-value plot with nested boxes
- id: SC-02
name: Data Mapping
score: 5
max: 5
passed: true
comment: Categories on X-axis, values on Y-axis
- id: SC-03
name: Required Features
score: 5
max: 5
passed: true
comment: Nested boxes, decreasing widths, outliers as points, legend explaining
quantile levels
- id: SC-04
name: Data Range
score: 3
max: 3
passed: true
comment: All data visible including outliers up to ~1400ms
- id: SC-05
name: Legend Accuracy
score: 2
max: 2
passed: true
comment: Legend correctly shows quantile range names
- id: SC-06
name: Title Format
score: 1
max: 2
passed: true
comment: Uses correct format but with Unicode middot character
data_quality:
score: 20
max: 20
items:
- id: DQ-01
name: Feature Coverage
score: 8
max: 8
passed: true
comment: 'Shows all aspects: different distribution shapes, varying spreads,
outliers, tail behavior'
- id: DQ-02
name: Realistic Context
score: 7
max: 7
passed: true
comment: Server response times is a real, neutral scenario perfectly suited
for large dataset visualization
- id: DQ-03
name: Appropriate Scale
score: 5
max: 5
passed: true
comment: Response times in realistic ranges (8-500ms base with occasional
slow requests up to 1400ms)
code_quality:
score: 7
max: 10
items:
- id: CQ-01
name: KISS Structure
score: 0
max: 3
passed: false
comment: Uses a function compute_letter_values() which violates KISS principle
- id: CQ-02
name: Reproducibility
score: 3
max: 3
passed: true
comment: Uses np.random.seed(42) for reproducibility
- id: CQ-03
name: Clean Imports
score: 2
max: 2
passed: true
comment: All imports are used
- id: CQ-04
name: No Deprecated API
score: 1
max: 1
passed: true
comment: Modern lets-plot API
- id: CQ-05
name: Output Correct
score: 1
max: 1
passed: true
comment: Saves as plot.png and plot.html
library_features:
score: 3
max: 5
items:
- id: LF-01
name: Distinctive Features
score: 3
max: 5
passed: true
comment: Uses ggplot2 grammar with geom_rect, geom_segment, geom_point. Manual
construction necessary as lets-plot has no native boxen geom.
verdict: APPROVED
impl_tags:
dependencies: []
techniques:
- layer-composition
- manual-ticks
- html-export
patterns:
- data-generation
- iteration-over-groups
dataprep:
- binning
styling:
- alpha-blending
- edge-highlighting