From 1f412c8326c94176b44a175bc39ea10075add0e0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 30 Dec 2025 21:53:47 +0000 Subject: [PATCH 1/5] feat(plotnine): implement parallel-categories-basic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 馃 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../implementations/plotnine.py | 285 ++++++++++++++++++ 1 file changed, 285 insertions(+) create mode 100644 plots/parallel-categories-basic/implementations/plotnine.py diff --git a/plots/parallel-categories-basic/implementations/plotnine.py b/plots/parallel-categories-basic/implementations/plotnine.py new file mode 100644 index 0000000000..a5be1fcb31 --- /dev/null +++ b/plots/parallel-categories-basic/implementations/plotnine.py @@ -0,0 +1,285 @@ +"""pyplots.ai +parallel-categories-basic: Basic Parallel Categories Plot +Library: plotnine | Python 3.13 +Quality: pending | Created: 2025-12-30 +""" + +import sys + + +# Prevent current directory from shadowing the plotnine package +sys.path = [p for p in sys.path if not p.endswith("implementations")] + +import numpy as np # noqa: E402 +import pandas as pd # noqa: E402 +from plotnine import ( # noqa: E402 + aes, + annotate, + coord_cartesian, + element_blank, + element_text, + geom_polygon, + geom_rect, + geom_text, + ggplot, + labs, + scale_fill_manual, + theme, + theme_minimal, +) + + +# Data - Customer journey data with multiple categorical dimensions +# Each row represents aggregated counts for a specific path through dimensions +np.random.seed(42) + +# Define category combinations and realistic counts +path_data = [ + # Channel -> Product Category -> Customer Type -> Outcome + ("Online", "Electronics", "New", "Purchased", 145), + ("Online", "Electronics", "New", "Abandoned", 98), + ("Online", "Electronics", "Returning", "Purchased", 187), + ("Online", "Electronics", "Returning", "Abandoned", 42), + ("Online", "Clothing", "New", "Purchased", 112), + ("Online", "Clothing", "New", "Abandoned", 76), + ("Online", "Clothing", "Returning", "Purchased", 156), + ("Online", "Clothing", "Returning", "Abandoned", 38), + ("Online", "Home", "New", "Purchased", 67), + ("Online", "Home", "New", "Abandoned", 54), + ("Online", "Home", "Returning", "Purchased", 89), + ("Online", "Home", "Returning", "Abandoned", 23), + ("Store", "Electronics", "New", "Purchased", 78), + ("Store", "Electronics", "New", "Abandoned", 32), + ("Store", "Electronics", "Returning", "Purchased", 124), + ("Store", "Electronics", "Returning", "Abandoned", 18), + ("Store", "Clothing", "New", "Purchased", 95), + ("Store", "Clothing", "New", "Abandoned", 28), + ("Store", "Clothing", "Returning", "Purchased", 142), + ("Store", "Clothing", "Returning", "Abandoned", 15), + ("Store", "Home", "New", "Purchased", 56), + ("Store", "Home", "New", "Abandoned", 21), + ("Store", "Home", "Returning", "Purchased", 78), + ("Store", "Home", "Returning", "Abandoned", 12), + ("Mobile", "Electronics", "New", "Purchased", 89), + ("Mobile", "Electronics", "New", "Abandoned", 112), + ("Mobile", "Electronics", "Returning", "Purchased", 134), + ("Mobile", "Electronics", "Returning", "Abandoned", 67), + ("Mobile", "Clothing", "New", "Purchased", 76), + ("Mobile", "Clothing", "New", "Abandoned", 94), + ("Mobile", "Clothing", "Returning", "Purchased", 118), + ("Mobile", "Clothing", "Returning", "Abandoned", 52), + ("Mobile", "Home", "New", "Purchased", 45), + ("Mobile", "Home", "New", "Abandoned", 58), + ("Mobile", "Home", "Returning", "Purchased", 67), + ("Mobile", "Home", "Returning", "Abandoned", 34), +] + +path_counts = pd.DataFrame(path_data, columns=["channel", "product", "customer_type", "outcome", "count"]) + +# Define dimensions and their category orders +dimensions = [ + {"name": "channel", "label": "Channel", "categories": ["Online", "Store", "Mobile"]}, + {"name": "product", "label": "Product", "categories": ["Electronics", "Clothing", "Home"]}, + {"name": "customer_type", "label": "Customer", "categories": ["New", "Returning"]}, + {"name": "outcome", "label": "Outcome", "categories": ["Purchased", "Abandoned"]}, +] + +# Color by outcome - Python Blue for abandoned, Yellow for purchased +outcome_colors = {"Purchased": "#FFD43B", "Abandoned": "#306998"} + +# Layout parameters +n_dims = len(dimensions) +x_positions = np.linspace(0.1, 0.9, n_dims) +node_width = 0.04 +node_gap = 0.03 +total_height = 0.82 +y_start = 0.92 + +# Calculate node positions for each dimension +node_positions = {} +for dim_idx, dim in enumerate(dimensions): + x_pos = x_positions[dim_idx] + categories = dim["categories"] + col_name = dim["name"] + + # Calculate totals for this dimension + if col_name == "outcome": + totals = path_counts.groupby(col_name)["count"].sum() + else: + totals = path_counts.groupby(col_name)["count"].sum() + + grand_total = totals.sum() + current_y = y_start + + for cat in categories: + count = totals.get(cat, 0) + height = (count / grand_total) * total_height if grand_total > 0 else 0 + + node_positions[(dim_idx, cat)] = { + "x": x_pos, + "y_top": current_y, + "y_bottom": current_y - height, + "height": height, + "count": count, + "flow_offset_out": 0, # For outgoing flows (right side) + "flow_offset_in": 0, # For incoming flows (left side) + } + current_y = current_y - height - node_gap + +# Build node rectangles dataframe +node_data = [] +for (dim_idx, cat), pos in node_positions.items(): + node_data.append( + { + "dim_idx": dim_idx, + "category": cat, + "xmin": pos["x"] - node_width / 2, + "xmax": pos["x"] + node_width / 2, + "ymin": pos["y_bottom"], + "ymax": pos["y_top"], + "label_y": (pos["y_top"] + pos["y_bottom"]) / 2, + "count": pos["count"], + "display_label": str(cat), + "fill_color": outcome_colors.get(cat, "#888888"), + } + ) +nodes_df = pd.DataFrame(node_data) + +# Build flow polygons between adjacent dimensions +flow_polygons = [] +flow_id_counter = 0 + +for _, path_row in path_counts.iterrows(): + path_values = [path_row["channel"], path_row["product"], path_row["customer_type"], path_row["outcome"]] + count = path_row["count"] + outcome = path_row["outcome"] + + # Draw flows between each adjacent pair of dimensions + for dim_idx in range(n_dims - 1): + from_cat = path_values[dim_idx] + to_cat = path_values[dim_idx + 1] + + src_pos = node_positions[(dim_idx, from_cat)] + tgt_pos = node_positions[(dim_idx + 1, to_cat)] + + # Calculate flow height proportional to count at source and target + src_total = sum(path_counts[path_counts[dimensions[dim_idx]["name"]] == from_cat]["count"]) + flow_height_src = (count / src_total) * src_pos["height"] if src_total > 0 else 0 + + tgt_total = sum(path_counts[path_counts[dimensions[dim_idx + 1]["name"]] == to_cat]["count"]) + flow_height_tgt = (count / tgt_total) * tgt_pos["height"] if tgt_total > 0 else 0 + + # Source connection point (right side of node) + src_y_top = src_pos["y_top"] - src_pos["flow_offset_out"] + src_y_bottom = src_y_top - flow_height_src + src_pos["flow_offset_out"] += flow_height_src + + # Target connection point (left side of node) + tgt_y_top = tgt_pos["y_top"] - tgt_pos["flow_offset_in"] + tgt_y_bottom = tgt_y_top - flow_height_tgt + tgt_pos["flow_offset_in"] += flow_height_tgt + + # Create curved flow polygon using cubic interpolation + flow_x_left = x_positions[dim_idx] + node_width / 2 + flow_x_right = x_positions[dim_idx + 1] - node_width / 2 + n_points = 30 + + t_param = np.linspace(0, 1, n_points) + # Smooth cubic easing for natural flow appearance + x_top = flow_x_left + (flow_x_right - flow_x_left) * t_param + y_top = src_y_top + (tgt_y_top - src_y_top) * (3 * t_param**2 - 2 * t_param**3) + + x_bottom = flow_x_right + (flow_x_left - flow_x_right) * t_param + y_bottom = tgt_y_bottom + (src_y_bottom - tgt_y_bottom) * (3 * t_param**2 - 2 * t_param**3) + + # Combine into polygon + x_polygon = np.concatenate([x_top, x_bottom]) + y_polygon = np.concatenate([y_top, y_bottom]) + + flow_id = f"flow_{flow_id_counter}" + flow_id_counter += 1 + + for i in range(len(x_polygon)): + flow_polygons.append({"x": x_polygon[i], "y": y_polygon[i], "flow_id": flow_id, "outcome": outcome}) + +flows_df = pd.DataFrame(flow_polygons) + +# Create the plot +plot = ( + ggplot() + # Flow polygons with transparency - colored by outcome + + geom_polygon(flows_df, aes(x="x", y="y", group="flow_id", fill="outcome"), alpha=0.5) + # Node rectangles - use neutral gray for all nodes + + geom_rect( + nodes_df, aes(xmin="xmin", xmax="xmax", ymin="ymin", ymax="ymax"), fill="#555555", color="white", size=0.8 + ) + # Category labels on nodes + + geom_text( + nodes_df[nodes_df["count"] >= 20], + aes(x=(nodes_df["xmin"] + nodes_df["xmax"]) / 2, y="label_y", label="count"), + ha="center", + va="center", + size=10, + color="white", + fontweight="bold", + ) + + scale_fill_manual(values=outcome_colors, name="Outcome", breaks=["Purchased", "Abandoned"]) + + labs(title="Customer Journey 路 parallel-categories-basic 路 plotnine 路 pyplots.ai", x="", y="") + + coord_cartesian(xlim=(0, 1), ylim=(-0.02, 1.02)) + + theme_minimal() + + theme( + figure_size=(16, 9), + plot_title=element_text(size=24, ha="center", weight="bold"), + axis_text=element_blank(), + axis_ticks=element_blank(), + panel_grid=element_blank(), + legend_title=element_text(size=16, weight="bold"), + legend_text=element_text(size=14), + legend_position="right", + ) +) + +# Add dimension labels at top +for dim_idx, dim in enumerate(dimensions): + plot = plot + annotate( + "text", + x=x_positions[dim_idx], + y=0.98, + label=dim["label"], + size=14, + color="#333333", + fontweight="bold", + ha="center", + ) + +# Add category labels beside each node +for (dim_idx, cat), pos in node_positions.items(): + label = str(cat) + label_y = (pos["y_top"] + pos["y_bottom"]) / 2 + + # For first dimension, place label on left + if dim_idx == 0: + plot = plot + annotate( + "text", + x=x_positions[dim_idx] - node_width / 2 - 0.01, + y=label_y, + label=label, + size=10, + color="#333333", + ha="right", + va="center", + ) + # For last dimension, place label on right + elif dim_idx == n_dims - 1: + plot = plot + annotate( + "text", + x=x_positions[dim_idx] + node_width / 2 + 0.01, + y=label_y, + label=label, + size=10, + color="#333333", + ha="left", + va="center", + ) + +plot.save("plot.png", dpi=300, verbose=False) From 523661f7302019d2e068bd87b678f23bf6b7aed8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 30 Dec 2025 21:54:08 +0000 Subject: [PATCH 2/5] chore(plotnine): add metadata for parallel-categories-basic --- .../metadata/plotnine.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 plots/parallel-categories-basic/metadata/plotnine.yaml diff --git a/plots/parallel-categories-basic/metadata/plotnine.yaml b/plots/parallel-categories-basic/metadata/plotnine.yaml new file mode 100644 index 0000000000..603da7f142 --- /dev/null +++ b/plots/parallel-categories-basic/metadata/plotnine.yaml @@ -0,0 +1,19 @@ +# Per-library metadata for plotnine implementation of parallel-categories-basic +# Auto-generated by impl-generate.yml + +library: plotnine +specification_id: parallel-categories-basic +created: '2025-12-30T21:54:08Z' +updated: '2025-12-30T21:54:08Z' +generated_by: claude-opus-4-5-20251101 +workflow_run: 20606634555 +issue: 0 +python_version: 3.13.11 +library_version: 0.15.2 +preview_url: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/plotnine/plot.png +preview_thumb: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/plotnine/plot_thumb.png +preview_html: null +quality_score: null +review: + strengths: [] + weaknesses: [] From 468e1caff59f76f7a88796bb785d24274f5b4b2f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 30 Dec 2025 21:56:25 +0000 Subject: [PATCH 3/5] chore(plotnine): update quality score 88 and review feedback for parallel-categories-basic --- .../implementations/plotnine.py | 6 ++--- .../metadata/plotnine.yaml | 25 +++++++++++++------ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/plots/parallel-categories-basic/implementations/plotnine.py b/plots/parallel-categories-basic/implementations/plotnine.py index a5be1fcb31..4bf7e8b1d5 100644 --- a/plots/parallel-categories-basic/implementations/plotnine.py +++ b/plots/parallel-categories-basic/implementations/plotnine.py @@ -1,7 +1,7 @@ -"""pyplots.ai +""" pyplots.ai parallel-categories-basic: Basic Parallel Categories Plot -Library: plotnine | Python 3.13 -Quality: pending | Created: 2025-12-30 +Library: plotnine 0.15.2 | Python 3.13.11 +Quality: 88/100 | Created: 2025-12-30 """ import sys diff --git a/plots/parallel-categories-basic/metadata/plotnine.yaml b/plots/parallel-categories-basic/metadata/plotnine.yaml index 603da7f142..92dc3bb8f2 100644 --- a/plots/parallel-categories-basic/metadata/plotnine.yaml +++ b/plots/parallel-categories-basic/metadata/plotnine.yaml @@ -1,10 +1,7 @@ -# Per-library metadata for plotnine implementation of parallel-categories-basic -# Auto-generated by impl-generate.yml - library: plotnine specification_id: parallel-categories-basic created: '2025-12-30T21:54:08Z' -updated: '2025-12-30T21:54:08Z' +updated: '2025-12-30T21:56:24Z' generated_by: claude-opus-4-5-20251101 workflow_run: 20606634555 issue: 0 @@ -13,7 +10,21 @@ library_version: 0.15.2 preview_url: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/plotnine/plot.png preview_thumb: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/plotnine/plot_thumb.png preview_html: null -quality_score: null +quality_score: 88 review: - strengths: [] - weaknesses: [] + strengths: + - Excellent visual implementation of parallel categories using plotnine's polygon + and rect geoms as building blocks + - Clean, professional appearance with well-balanced layout and appropriate use of + transparency for ribbons + - Realistic customer journey scenario with meaningful data that demonstrates the + plot type's purpose + - Smooth curved ribbons using cubic interpolation create an elegant flow visualization + - Good color scheme (Python blue/yellow) that is colorblind-accessible + weaknesses: + - Title format deviates from spec by prepending "Customer Journey" instead of using + pure "{spec-id} 路 {library} 路 pyplots.ai" format + - Category labels for middle dimensions (Product, Customer) are not displayed beside + nodes, only counts shown inside + - Some ribbon crossings create visual complexity that could be reduced by reordering + categories From 45e2111aec5d79e0b30d9b74a53bb6e9f414f8b8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 30 Dec 2025 22:00:03 +0000 Subject: [PATCH 4/5] fix(plotnine): address review feedback for parallel-categories-basic - Fix title format to use pure spec-id format (no "Customer Journey" prefix) - Add category labels for middle dimensions (Product, Customer) below nodes - Reorder customer categories (Returning before New) to reduce ribbon crossings Attempt 1/3 - fixes based on AI review --- .../implementations/plotnine.py | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/plots/parallel-categories-basic/implementations/plotnine.py b/plots/parallel-categories-basic/implementations/plotnine.py index 4bf7e8b1d5..931b5c5a1e 100644 --- a/plots/parallel-categories-basic/implementations/plotnine.py +++ b/plots/parallel-categories-basic/implementations/plotnine.py @@ -1,4 +1,4 @@ -""" pyplots.ai +"""pyplots.ai parallel-categories-basic: Basic Parallel Categories Plot Library: plotnine 0.15.2 | Python 3.13.11 Quality: 88/100 | Created: 2025-12-30 @@ -76,11 +76,11 @@ path_counts = pd.DataFrame(path_data, columns=["channel", "product", "customer_type", "outcome", "count"]) -# Define dimensions and their category orders +# Define dimensions and their category orders (ordered to minimize ribbon crossings) dimensions = [ {"name": "channel", "label": "Channel", "categories": ["Online", "Store", "Mobile"]}, {"name": "product", "label": "Product", "categories": ["Electronics", "Clothing", "Home"]}, - {"name": "customer_type", "label": "Customer", "categories": ["New", "Returning"]}, + {"name": "customer_type", "label": "Customer", "categories": ["Returning", "New"]}, {"name": "outcome", "label": "Outcome", "categories": ["Purchased", "Abandoned"]}, ] @@ -224,7 +224,7 @@ fontweight="bold", ) + scale_fill_manual(values=outcome_colors, name="Outcome", breaks=["Purchased", "Abandoned"]) - + labs(title="Customer Journey 路 parallel-categories-basic 路 plotnine 路 pyplots.ai", x="", y="") + + labs(title="parallel-categories-basic 路 plotnine 路 pyplots.ai", x="", y="") + coord_cartesian(xlim=(0, 1), ylim=(-0.02, 1.02)) + theme_minimal() + theme( @@ -252,12 +252,12 @@ ha="center", ) -# Add category labels beside each node +# Add category labels beside each node (all dimensions) for (dim_idx, cat), pos in node_positions.items(): label = str(cat) label_y = (pos["y_top"] + pos["y_bottom"]) / 2 - # For first dimension, place label on left + # For first dimension, place label on left side of node if dim_idx == 0: plot = plot + annotate( "text", @@ -269,7 +269,7 @@ ha="right", va="center", ) - # For last dimension, place label on right + # For last dimension, place label on right side of node elif dim_idx == n_dims - 1: plot = plot + annotate( "text", @@ -281,5 +281,17 @@ ha="left", va="center", ) + # For middle dimensions, place label below the node + else: + plot = plot + annotate( + "text", + x=x_positions[dim_idx], + y=pos["y_bottom"] - 0.015, + label=label, + size=9, + color="#333333", + ha="center", + va="top", + ) plot.save("plot.png", dpi=300, verbose=False) From 40b453985357ba39f4aba9a48368091b0b36b0da Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 30 Dec 2025 22:02:47 +0000 Subject: [PATCH 5/5] chore(plotnine): update quality score 90 and review feedback for parallel-categories-basic --- .../implementations/plotnine.py | 4 +-- .../metadata/plotnine.yaml | 29 +++++++++---------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/plots/parallel-categories-basic/implementations/plotnine.py b/plots/parallel-categories-basic/implementations/plotnine.py index 931b5c5a1e..be0d9e8abc 100644 --- a/plots/parallel-categories-basic/implementations/plotnine.py +++ b/plots/parallel-categories-basic/implementations/plotnine.py @@ -1,7 +1,7 @@ -"""pyplots.ai +""" pyplots.ai parallel-categories-basic: Basic Parallel Categories Plot Library: plotnine 0.15.2 | Python 3.13.11 -Quality: 88/100 | Created: 2025-12-30 +Quality: 90/100 | Created: 2025-12-30 """ import sys diff --git a/plots/parallel-categories-basic/metadata/plotnine.yaml b/plots/parallel-categories-basic/metadata/plotnine.yaml index 92dc3bb8f2..7cafe13470 100644 --- a/plots/parallel-categories-basic/metadata/plotnine.yaml +++ b/plots/parallel-categories-basic/metadata/plotnine.yaml @@ -1,7 +1,7 @@ library: plotnine specification_id: parallel-categories-basic created: '2025-12-30T21:54:08Z' -updated: '2025-12-30T21:56:24Z' +updated: '2025-12-30T22:02:46Z' generated_by: claude-opus-4-5-20251101 workflow_run: 20606634555 issue: 0 @@ -10,21 +10,18 @@ library_version: 0.15.2 preview_url: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/plotnine/plot.png preview_thumb: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/plotnine/plot_thumb.png preview_html: null -quality_score: 88 +quality_score: 90 review: strengths: - - Excellent visual implementation of parallel categories using plotnine's polygon - and rect geoms as building blocks - - Clean, professional appearance with well-balanced layout and appropriate use of - transparency for ribbons - - Realistic customer journey scenario with meaningful data that demonstrates the - plot type's purpose - - Smooth curved ribbons using cubic interpolation create an elegant flow visualization - - Good color scheme (Python blue/yellow) that is colorblind-accessible + - Creative implementation of parallel categories using plotnine basic geoms (geom_polygon, + geom_rect) + - Smooth cubic interpolation for ribbon curves creates professional appearance + - Clear visual distinction between outcomes with yellow/blue color scheme + - Effective use of transparency (alpha=0.5) for overlapping ribbons + - Well-organized data structure with explicit path counts + - Category labels positioned intelligently based on dimension position + - Counts displayed inside nodes for quantitative reference weaknesses: - - Title format deviates from spec by prepending "Customer Journey" instead of using - pure "{spec-id} 路 {library} 路 pyplots.ai" format - - Category labels for middle dimensions (Product, Customer) are not displayed beside - nodes, only counts shown inside - - Some ribbon crossings create visual complexity that could be reduced by reordering - categories + - Middle dimension category labels positioned below nodes are slightly smaller (9pt) + and could be harder to read + - Some imports may be unused (coord_cartesian could be replaced with theme settings)