From 0ca2764c42c64fc27c8fb892276585e9636638eb Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sat, 6 Dec 2025 23:20:04 +0000 Subject: [PATCH] feat(pygal): implement box-basic Simplify box plot implementation to follow KISS principle: - Remove functions/classes, use sequential code - Use project color palette - Set dimensions to 4800x2700 as per style guide - Increase font sizes for readability at target resolution --- plots/pygal/box/box-basic/default.py | 199 ++++++--------------------- 1 file changed, 42 insertions(+), 157 deletions(-) diff --git a/plots/pygal/box/box-basic/default.py b/plots/pygal/box/box-basic/default.py index a71dedf887..c6667171bc 100644 --- a/plots/pygal/box/box-basic/default.py +++ b/plots/pygal/box/box-basic/default.py @@ -1,166 +1,51 @@ """ box-basic: Basic Box Plot -Implementation for: pygal -Variant: default -Python: 3.10+ +Library: pygal """ -from typing import TYPE_CHECKING, Optional - import numpy as np -import pandas as pd import pygal from pygal.style import Style -if TYPE_CHECKING: - from pygal import Box - - -def create_plot( - data: pd.DataFrame, - values: str, - groups: str, - title: Optional[str] = None, - xlabel: Optional[str] = None, - ylabel: Optional[str] = None, - width: int = 1600, - height: int = 900, - show_legend: bool = True, - **kwargs, -) -> Box: - """ - Create a basic box plot showing statistical distribution of multiple groups using pygal. - - Args: - data: Input DataFrame with required columns - values: Column name containing numeric values - groups: Column name containing group categories - title: Plot title (optional) - xlabel: Custom x-axis label (optional, defaults to groups column name) - ylabel: Custom y-axis label (optional, defaults to values column name) - width: Figure width in pixels (default: 1600) - height: Figure height in pixels (default: 900) - show_legend: Whether to show legend (default: True) - **kwargs: Additional parameters for pygal configuration - - Returns: - pygal Box chart object - - Raises: - ValueError: If data is empty - KeyError: If required columns not found - - Example: - >>> data = pd.DataFrame({ - ... 'Group': ['A', 'A', 'B', 'B', 'C', 'C'], - ... 'Value': [1, 2, 2, 3, 3, 4] - ... }) - >>> chart = create_plot(data, values='Value', groups='Group') - """ - # Input validation - if data.empty: - raise ValueError("Data cannot be empty") - - # Check required columns - for col in [values, groups]: - if col not in data.columns: - available = ", ".join(data.columns) - raise KeyError(f"Column '{col}' not found. Available columns: {available}") - - # Create custom style - custom_style = Style( - background="white", - plot_background="white", - foreground="#333", - foreground_strong="#333", - foreground_subtle="#555", - opacity=0.7, - opacity_hover=0.9, - colors=("#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3", "#a6d854", "#ffd92f", "#e5c494", "#b3b3b3"), - font_family="Arial, sans-serif", - major_guide_stroke_dasharray="3,3", - guide_stroke_dasharray="1,1", - ) - - # Create box plot - box_chart = pygal.Box( - title=title or "Box Plot Distribution", - x_title=xlabel or groups, - y_title=ylabel or values, - width=width, - height=height, - show_legend=show_legend, - style=custom_style, - box_mode="tukey", # Use Tukey method (1.5 * IQR for whiskers) - print_values=False, - print_zeroes=False, - **kwargs, - ) - - # Calculate box plot data for each group - group_names = sorted(data[groups].unique()) - - for group in group_names: - group_data = data[data[groups] == group][values].dropna() - - # Pygal's Box chart expects data in a specific format: - # [min, Q1, median, Q3, max] or the raw values (pygal will calculate) - # We'll provide the raw values and let pygal handle the calculations - values_list = group_data.tolist() - - # Add the series with label - box_chart.add(f"{group} (n={len(values_list)})", values_list) - - return box_chart - - -if __name__ == "__main__": - # Sample data for testing with different distributions per group - np.random.seed(42) # For reproducibility - - # Generate sample data with 4 groups - data_dict = {"Group": [], "Value": []} - - # Group A: Normal distribution, mean=50, std=10 - group_a_data = np.random.normal(50, 10, 40) - # Add some outliers - group_a_data = np.append(group_a_data, [80, 85, 15]) - - # Group B: Normal distribution, mean=60, std=15 - group_b_data = np.random.normal(60, 15, 35) - # Add outliers - group_b_data = np.append(group_b_data, [100, 10]) - - # Group C: Normal distribution, mean=45, std=8 - group_c_data = np.random.normal(45, 8, 45) - - # Group D: Skewed distribution - group_d_data = np.random.gamma(2, 2, 30) + 40 - # Add outliers - group_d_data = np.append(group_d_data, [75, 78, 20]) - - # Combine all data - for group, values in zip( - ["Group A", "Group B", "Group C", "Group D"], - [group_a_data, group_b_data, group_c_data, group_d_data], - strict=False, - ): - data_dict["Group"].extend([group] * len(values)) - data_dict["Value"].extend(values) - - data = pd.DataFrame(data_dict) - - # Create plot - chart = create_plot( - data, - values="Value", - groups="Group", - title="Statistical Distribution Comparison Across Groups", - ylabel="Measurement Value", - xlabel="Categories", - ) - - # Save as PNG - chart.render_to_png("plot.png") - print("Plot saved to plot.png") +# Data +np.random.seed(42) +group_a = np.random.normal(50, 10, 50).tolist() +group_b = np.random.normal(60, 15, 50).tolist() +group_c = np.random.normal(45, 8, 50).tolist() +group_d = np.random.normal(70, 20, 50).tolist() + +# Custom style using project palette +custom_style = Style( + background="white", + plot_background="white", + foreground="#333333", + foreground_strong="#333333", + foreground_subtle="#666666", + colors=("#306998", "#FFD43B", "#DC2626", "#059669", "#8B5CF6", "#F97316"), + title_font_size=48, + label_font_size=40, + legend_font_size=36, + value_font_size=32, +) + +# Create box plot +chart = pygal.Box( + width=4800, + height=2700, + title="Basic Box Plot", + x_title="Group", + y_title="Value", + style=custom_style, + show_legend=True, + legend_at_bottom=True, +) + +# Add data series +chart.add("A", group_a) +chart.add("B", group_b) +chart.add("C", group_c) +chart.add("D", group_d) + +# Save +chart.render_to_png("plot.png")