diff --git a/plots/altair/point/scatter-color-groups/default.py b/plots/altair/point/scatter-color-groups/default.py new file mode 100644 index 0000000000..9790cb2f1d --- /dev/null +++ b/plots/altair/point/scatter-color-groups/default.py @@ -0,0 +1,54 @@ +""" +scatter-color-groups: Scatter Plot with Color Groups +Library: altair +""" + +import altair as alt +import numpy as np +import pandas as pd + + +# Data - create iris-like dataset with three species groups +np.random.seed(42) + +# Generate data for three groups with different cluster centers +n_per_group = 50 + +# Setosa: smaller sepal length, larger sepal width +setosa_x = np.random.normal(5.0, 0.4, n_per_group) +setosa_y = np.random.normal(3.4, 0.4, n_per_group) + +# Versicolor: medium values +versicolor_x = np.random.normal(6.0, 0.5, n_per_group) +versicolor_y = np.random.normal(2.8, 0.3, n_per_group) + +# Virginica: larger sepal length, medium sepal width +virginica_x = np.random.normal(6.6, 0.6, n_per_group) +virginica_y = np.random.normal(3.0, 0.35, n_per_group) + +data = pd.DataFrame( + { + "sepal_length": np.concatenate([setosa_x, versicolor_x, virginica_x]), + "sepal_width": np.concatenate([setosa_y, versicolor_y, virginica_y]), + "species": ["setosa"] * n_per_group + ["versicolor"] * n_per_group + ["virginica"] * n_per_group, + } +) + +# Define custom color palette (colorblind-safe) +color_scale = alt.Scale(domain=["setosa", "versicolor", "virginica"], range=["#306998", "#FFD43B", "#059669"]) + +# Create scatter plot with color groups +chart = ( + alt.Chart(data) + .mark_point(size=100, opacity=0.7) + .encode( + x=alt.X("sepal_length:Q", title="Sepal Length (cm)"), + y=alt.Y("sepal_width:Q", title="Sepal Width (cm)"), + color=alt.Color("species:N", title="Species", scale=color_scale), + tooltip=["species:N", "sepal_length:Q", "sepal_width:Q"], + ) + .properties(width=1600, height=900, title="Scatter Plot with Color Groups") +) + +# Save as PNG (1600 × 900 × 3 = 4800 × 2700 px) +chart.save("plot.png", scale_factor=3.0) diff --git a/plots/bokeh/scatter/scatter-color-groups/default.py b/plots/bokeh/scatter/scatter-color-groups/default.py new file mode 100644 index 0000000000..5b0fe00573 --- /dev/null +++ b/plots/bokeh/scatter/scatter-color-groups/default.py @@ -0,0 +1,65 @@ +""" +scatter-color-groups: Scatter Plot with Color Groups +Library: bokeh +""" + +import numpy as np +import pandas as pd +from bokeh.io import export_png +from bokeh.models import ColumnDataSource +from bokeh.plotting import figure + + +# Data - Iris-like dataset +np.random.seed(42) +n_per_group = 50 + +data = pd.DataFrame({ + "sepal_length": np.concatenate([ + np.random.normal(5.0, 0.35, n_per_group), + np.random.normal(5.9, 0.50, n_per_group), + np.random.normal(6.6, 0.60, n_per_group), + ]), + "sepal_width": np.concatenate([ + np.random.normal(3.4, 0.38, n_per_group), + np.random.normal(2.8, 0.30, n_per_group), + np.random.normal(3.0, 0.30, n_per_group), + ]), + "species": ["setosa"] * n_per_group + ["versicolor"] * n_per_group + ["virginica"] * n_per_group, +}) + +# Color palette (from style guide) +colors = ["#306998", "#FFD43B", "#DC2626", "#059669", "#8B5CF6", "#F97316"] +species_list = data["species"].unique().tolist() +color_map = {species: colors[i] for i, species in enumerate(species_list)} +data["color"] = data["species"].map(color_map) + +# Create figure +p = figure( + width=4800, + height=2700, + title="Scatter Plot with Color Groups", + x_axis_label="Sepal Length (cm)", + y_axis_label="Sepal Width (cm)", +) + +# Plot each group separately for legend +for species in species_list: + species_data = data[data["species"] == species] + source = ColumnDataSource(data={"x": species_data["sepal_length"], "y": species_data["sepal_width"]}) + p.scatter( + x="x", y="y", source=source, size=12, alpha=0.7, color=color_map[species], legend_label=species.capitalize() + ) + +# Styling +p.title.text_font_size = "20pt" +p.xaxis.axis_label_text_font_size = "20pt" +p.yaxis.axis_label_text_font_size = "20pt" +p.xaxis.major_label_text_font_size = "16pt" +p.yaxis.major_label_text_font_size = "16pt" +p.legend.label_text_font_size = "16pt" +p.legend.location = "top_right" +p.grid.grid_line_alpha = 0.3 + +# Save +export_png(p, filename="plot.png") diff --git a/plots/highcharts/scatter/scatter-color-groups/default.py b/plots/highcharts/scatter/scatter-color-groups/default.py new file mode 100644 index 0000000000..de70319a52 --- /dev/null +++ b/plots/highcharts/scatter/scatter-color-groups/default.py @@ -0,0 +1,166 @@ +""" +scatter-color-groups: Scatter Plot with Color Groups +Library: highcharts + +Note: Highcharts requires a license for commercial use. +""" + +import json +import tempfile +import time +import urllib.request +from pathlib import Path + +from highcharts_core.chart import Chart +from highcharts_core.options import HighchartsOptions +from highcharts_core.options.series.scatter import ScatterSeries +from selenium import webdriver +from selenium.webdriver.chrome.options import Options + + +# Color palette from style guide +COLORS = ["#306998", "#FFD43B", "#DC2626", "#059669", "#8B5CF6", "#F97316"] + +# Data - Iris dataset (sepal_length, sepal_width) by species +# fmt: off +iris_data = { + "setosa": [ + (5.1, 3.5), (4.9, 3.0), (4.7, 3.2), (4.6, 3.1), (5.0, 3.6), (5.4, 3.9), (4.6, 3.4), (5.0, 3.4), + (4.4, 2.9), (4.9, 3.1), (5.4, 3.7), (4.8, 3.4), (4.8, 3.0), (4.3, 3.0), (5.8, 4.0), (5.7, 4.4), + (5.4, 3.9), (5.1, 3.5), (5.7, 3.8), (5.1, 3.8), (5.4, 3.4), (5.1, 3.7), (4.6, 3.6), (5.1, 3.3), + (4.8, 3.4), (5.0, 3.0), (5.0, 3.4), (5.2, 3.5), (5.2, 3.4), (4.7, 3.2), (4.8, 3.1), (5.4, 3.4), + (5.2, 4.1), (5.5, 4.2), (4.9, 3.1), (5.0, 3.2), (5.5, 3.5), (4.9, 3.6), (4.4, 3.0), (5.1, 3.4), + (5.0, 3.5), (4.5, 2.3), (4.4, 3.2), (5.0, 3.5), (5.1, 3.8), (4.8, 3.0), (5.1, 3.8), (4.6, 3.2), + (5.3, 3.7), (5.0, 3.3), + ], + "versicolor": [ + (7.0, 3.2), (6.4, 3.2), (6.9, 3.1), (5.5, 2.3), (6.5, 2.8), (5.7, 2.8), (6.3, 3.3), (4.9, 2.4), + (6.6, 2.9), (5.2, 2.7), (5.0, 2.0), (5.9, 3.0), (6.0, 2.2), (6.1, 2.9), (5.6, 2.9), (6.7, 3.1), + (5.6, 3.0), (5.8, 2.7), (6.2, 2.2), (5.6, 2.5), (5.9, 3.2), (6.1, 2.8), (6.3, 2.5), (6.1, 2.8), + (6.4, 2.9), (6.6, 3.0), (6.8, 2.8), (6.7, 3.0), (6.0, 2.9), (5.7, 2.6), (5.5, 2.4), (5.5, 2.4), + (5.8, 2.7), (6.0, 2.7), (5.4, 3.0), (6.0, 3.4), (6.7, 3.1), (6.3, 2.3), (5.6, 3.0), (5.5, 2.5), + (5.5, 2.6), (6.1, 3.0), (5.8, 2.6), (5.0, 2.3), (5.6, 2.7), (5.7, 3.0), (5.7, 2.9), (6.2, 2.9), + (5.1, 2.5), (5.7, 2.8), + ], + "virginica": [ + (6.3, 3.3), (5.8, 2.7), (7.1, 3.0), (6.3, 2.9), (6.5, 3.0), (7.6, 3.0), (4.9, 2.5), (7.3, 2.9), + (6.7, 2.5), (7.2, 3.6), (6.5, 3.2), (6.4, 2.7), (6.8, 3.0), (5.7, 2.5), (5.8, 2.8), (6.4, 3.2), + (6.5, 3.0), (7.7, 3.8), (7.7, 2.6), (6.0, 2.2), (6.9, 3.2), (5.6, 2.8), (7.7, 2.8), (6.3, 2.7), + (6.7, 3.3), (7.2, 3.2), (6.2, 2.8), (6.1, 3.0), (6.4, 2.8), (7.2, 3.0), (7.4, 2.8), (7.9, 3.8), + (6.4, 2.8), (6.3, 2.8), (6.1, 2.6), (7.7, 3.0), (6.3, 3.4), (6.4, 3.1), (6.0, 3.0), (6.9, 3.1), + (6.7, 3.1), (6.9, 3.1), (5.8, 2.7), (6.8, 3.2), (6.7, 3.3), (6.7, 3.0), (6.3, 2.5), (6.5, 3.0), + (6.2, 3.4), (5.9, 3.0), + ], +} +# fmt: on +groups = list(iris_data.keys()) + +# Create chart with container ID for rendering +chart = Chart(container="container") +chart.options = HighchartsOptions() + +# Chart configuration - 4800 x 2700 px per style guide +chart.options.chart = {"type": "scatter", "width": 4800, "height": 2700, "backgroundColor": "#ffffff"} + +# Title +chart.options.title = { + "text": "Iris Dataset: Sepal Dimensions by Species", + "style": {"fontSize": "48px", "fontWeight": "bold"}, +} + +# X-axis configuration +chart.options.x_axis = { + "title": {"text": "Sepal Length (cm)", "style": {"fontSize": "36px"}}, + "labels": {"style": {"fontSize": "28px"}}, + "gridLineWidth": 1, + "gridLineDashStyle": "Dot", + "gridLineColor": "rgba(0, 0, 0, 0.15)", +} + +# Y-axis configuration +chart.options.y_axis = { + "title": {"text": "Sepal Width (cm)", "style": {"fontSize": "36px"}}, + "labels": {"style": {"fontSize": "28px"}}, + "gridLineWidth": 1, + "gridLineDashStyle": "Dot", + "gridLineColor": "rgba(0, 0, 0, 0.15)", +} + +# Plot options for scatter +chart.options.plot_options = { + "scatter": { + "marker": {"radius": 12, "states": {"hover": {"enabled": True, "lineColor": "rgb(100,100,100)"}}}, + "states": {"hover": {"marker": {"enabled": False}}}, + } +} + +# Add a series for each group with distinct colors +for i, group in enumerate(groups): + series = ScatterSeries() + series.name = group.capitalize() + series.data = iris_data[group] + series.color = COLORS[i % len(COLORS)] + chart.add_series(series) + +# Legend configuration +chart.options.legend = { + "enabled": True, + "align": "right", + "verticalAlign": "middle", + "layout": "vertical", + "itemStyle": {"fontSize": "28px"}, +} + +# Tooltip configuration +chart.options.tooltip = { + "headerFormat": "{series.name}
", + "pointFormat": "Sepal Length: {point.x} cm
Sepal Width: {point.y} cm", + "style": {"fontSize": "24px"}, +} + +# Disable credits +chart.options.credits = {"enabled": False} + +# Export to PNG via Selenium screenshot +# Download Highcharts JS (required for headless Chrome which can't load CDN) +highcharts_url = "https://code.highcharts.com/highcharts.js" +with urllib.request.urlopen(highcharts_url, timeout=30) as response: + highcharts_js = response.read().decode("utf-8") + +# Get chart options as JSON +opts_json = json.dumps(chart.options.to_dict()) + +html_content = f""" + + + + + + +
+ + +""" + +# Write temp HTML and take screenshot +with tempfile.NamedTemporaryFile(mode="w", suffix=".html", delete=False, encoding="utf-8") as f: + f.write(html_content) + temp_path = f.name + +chrome_options = Options() +chrome_options.add_argument("--headless") +chrome_options.add_argument("--no-sandbox") +chrome_options.add_argument("--disable-dev-shm-usage") +chrome_options.add_argument("--disable-gpu") +chrome_options.add_argument("--window-size=4800,2800") + +driver = webdriver.Chrome(options=chrome_options) +driver.get(f"file://{temp_path}") +time.sleep(5) # Wait for chart to render +driver.save_screenshot("plot.png") +driver.quit() + +Path(temp_path).unlink() # Clean up temp file +print("Plot saved to plot.png") diff --git a/plots/letsplot/point/scatter-color-groups/default.py b/plots/letsplot/point/scatter-color-groups/default.py new file mode 100644 index 0000000000..d3fc45a02d --- /dev/null +++ b/plots/letsplot/point/scatter-color-groups/default.py @@ -0,0 +1,70 @@ +""" +scatter-color-groups: Scatter Plot with Color Groups +Library: letsplot +""" + +import numpy as np +import pandas as pd +from lets_plot import ( + LetsPlot, + aes, + element_text, + geom_point, + ggplot, + ggsave, + ggsize, + labs, + scale_color_manual, + theme, + theme_minimal, +) + + +LetsPlot.setup_html() + +# Data - Generate iris-like dataset +np.random.seed(42) +n_per_species = 50 + +# Setosa: smaller sepals +setosa_length = np.random.normal(5.0, 0.35, n_per_species) +setosa_width = np.random.normal(3.4, 0.38, n_per_species) + +# Versicolor: medium sepals +versicolor_length = np.random.normal(5.9, 0.52, n_per_species) +versicolor_width = np.random.normal(2.8, 0.31, n_per_species) + +# Virginica: larger sepals +virginica_length = np.random.normal(6.6, 0.64, n_per_species) +virginica_width = np.random.normal(3.0, 0.32, n_per_species) + +data = pd.DataFrame( + { + "sepal_length": np.concatenate([setosa_length, versicolor_length, virginica_length]), + "sepal_width": np.concatenate([setosa_width, versicolor_width, virginica_width]), + "species": ["Setosa"] * n_per_species + ["Versicolor"] * n_per_species + ["Virginica"] * n_per_species, + } +) + +# Custom color palette (colorblind-safe) +colors = ["#306998", "#FFD43B", "#059669"] + +# Plot +plot = ( + ggplot(data, aes(x="sepal_length", y="sepal_width", color="species")) + + geom_point(size=4, alpha=0.7) + + scale_color_manual(values=colors) + + labs(x="Sepal Length (cm)", y="Sepal Width (cm)", title="Iris Sepal Dimensions by Species", color="Species") + + theme_minimal() + + theme( + plot_title=element_text(size=20), + axis_title=element_text(size=20), + axis_text=element_text(size=16), + legend_title=element_text(size=16), + legend_text=element_text(size=16), + ) + + ggsize(1600, 900) +) + +# Save (scale 3x to get 4800 x 2700 px) +ggsave(plot, "plot.png", path=".", scale=3) diff --git a/plots/matplotlib/scatter/scatter-color-groups/default.py b/plots/matplotlib/scatter/scatter-color-groups/default.py index 02fef6ecce..7f45e2c195 100644 --- a/plots/matplotlib/scatter/scatter-color-groups/default.py +++ b/plots/matplotlib/scatter/scatter-color-groups/default.py @@ -1,135 +1,59 @@ """ scatter-color-groups: Scatter Plot with Color Groups -Implementation for: matplotlib -Variant: default -Python: 3.10+ +Library: matplotlib """ -from typing import TYPE_CHECKING - import matplotlib.pyplot as plt +import numpy as np import pandas as pd -if TYPE_CHECKING: - from matplotlib.figure import Figure - - -def create_plot( - data: pd.DataFrame, - x: str, - y: str, - group: str, - figsize: tuple[float, float] = (16, 9), - alpha: float = 0.7, - size: float = 50, - title: str | None = None, - xlabel: str | None = None, - ylabel: str | None = None, - palette: str = "Set1", - **kwargs, -) -> "Figure": - """ - Create a scatter plot with points colored by categorical groups. - - Visualizes data points in a 2D x-y space with distinct colors for each - categorical group, showing separate "color clouds" for different categories. - - Args: - data: Input DataFrame with required columns - x: Column name for x-axis values - y: Column name for y-axis values - group: Column name for categorical grouping and coloring - figsize: Figure size as (width, height) tuple (default: (16, 9)) - alpha: Transparency level for points (default: 0.7) - size: Point size (default: 50) - title: Plot title (default: None) - xlabel: Custom x-axis label (default: uses column name) - ylabel: Custom y-axis label (default: uses column name) - palette: Matplotlib colormap or seaborn palette name (default: "Set1") - **kwargs: Additional parameters passed to scatter plot - - Returns: - Matplotlib Figure object - - Raises: - ValueError: If data is empty - KeyError: If required columns not found in data - - Example: - >>> import pandas as pd - >>> data = pd.DataFrame({ - ... 'x': [1, 2, 3, 4, 5, 6], - ... 'y': [2, 4, 3, 5, 6, 4], - ... 'group': ['A', 'A', 'B', 'B', 'C', 'C'] - ... }) - >>> fig = create_plot(data, 'x', 'y', 'group') - >>> plt.savefig('plot.png') - """ - # Input validation - if data.empty: - raise ValueError("Data cannot be empty") - - # Check required columns - required_cols = [x, y, group] - for col in required_cols: - if col not in data.columns: - available = ", ".join(data.columns) - raise KeyError(f"Column '{col}' not found in data. Available columns: {available}") - - # Create figure and axis - fig, ax = plt.subplots(figsize=figsize) - - # Get unique groups and create color mapping - groups = data[group].unique() - - # Get colors from palette - try: - cmap = plt.get_cmap(palette) - colors = [cmap(i / max(len(groups) - 1, 1)) for i in range(len(groups))] - except (ValueError, AttributeError): - # Fallback to tab10 if palette not found - cmap = plt.get_cmap("tab10") - colors = [cmap(i % 10) for i in range(len(groups))] - - # Plot each group with a different color - for idx, group_val in enumerate(groups): - group_data = data[data[group] == group_val] - ax.scatter(group_data[x], group_data[y], label=str(group_val), alpha=alpha, s=size, color=colors[idx], **kwargs) - - # Set labels - ax.set_xlabel(xlabel or x, fontsize=11) - ax.set_ylabel(ylabel or y, fontsize=11) - - # Add title if provided - if title: - ax.set_title(title, fontsize=12, fontweight="bold", pad=15) - - # Add legend - ax.legend(title=group, loc="best", framealpha=0.9) - - # Add subtle grid - ax.grid(True, alpha=0.3, linestyle="--") - - # Layout - plt.tight_layout() - - return fig - - -if __name__ == "__main__": - # Sample data for testing - data = pd.DataFrame( - { - "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5], - "y": [2, 4, 3, 5, 6, 4, 7, 8, 9, 10, 3, 5, 4, 6, 7, 5], - "group": ["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C"], - } +# Data - Iris-like dataset +np.random.seed(42) +n_per_group = 50 + +data = pd.DataFrame({ + "sepal_length": np.concatenate([ + np.random.normal(5.0, 0.35, n_per_group), + np.random.normal(5.9, 0.50, n_per_group), + np.random.normal(6.6, 0.60, n_per_group), + ]), + "sepal_width": np.concatenate([ + np.random.normal(3.4, 0.38, n_per_group), + np.random.normal(2.8, 0.30, n_per_group), + np.random.normal(3.0, 0.30, n_per_group), + ]), + "species": ["setosa"] * n_per_group + ["versicolor"] * n_per_group + ["virginica"] * n_per_group, +}) + +# Color palette (colorblind safe from style guide) +colors = ["#306998", "#FFD43B", "#DC2626"] +species = data["species"].unique() +color_map = {sp: colors[i] for i, sp in enumerate(species)} + +# Create plot +fig, ax = plt.subplots(figsize=(16, 9)) + +for species_name in species: + subset = data[data["species"] == species_name] + ax.scatter( + subset["sepal_length"], + subset["sepal_width"], + c=color_map[species_name], + label=species_name.capitalize(), + alpha=0.7, + s=80, + edgecolors="white", + linewidths=0.5, ) - # Create plot - fig = create_plot(data, "x", "y", "group", title="Scatter Plot with Color Groups") +# Labels and styling +ax.set_xlabel("Sepal Length (cm)", fontsize=20) +ax.set_ylabel("Sepal Width (cm)", fontsize=20) +ax.set_title("Iris Species by Sepal Dimensions", fontsize=20) +ax.tick_params(axis="both", labelsize=16) +ax.legend(title="Species", fontsize=16, title_fontsize=16) +ax.grid(True, alpha=0.3) - # Save for inspection - plt.savefig("plot.png", dpi=300, bbox_inches="tight") - print("Plot saved to plot.png") +plt.tight_layout() +plt.savefig("plot.png", dpi=300, bbox_inches="tight") diff --git a/plots/plotly/scatter/scatter-color-groups/default.py b/plots/plotly/scatter/scatter-color-groups/default.py new file mode 100644 index 0000000000..34725118d4 --- /dev/null +++ b/plots/plotly/scatter/scatter-color-groups/default.py @@ -0,0 +1,70 @@ +""" +scatter-color-groups: Scatter Plot with Color Groups +Library: plotly +""" + +import numpy as np +import pandas as pd +import plotly.express as px + + +# Data - iris-like dataset with three species groups +np.random.seed(42) +n_per_group = 50 + +# Generate data for three species groups with distinct cluster patterns +setosa = pd.DataFrame( + { + "sepal_length": np.random.normal(5.0, 0.35, n_per_group), + "sepal_width": np.random.normal(3.4, 0.38, n_per_group), + "species": "setosa", + } +) +versicolor = pd.DataFrame( + { + "sepal_length": np.random.normal(5.9, 0.52, n_per_group), + "sepal_width": np.random.normal(2.8, 0.31, n_per_group), + "species": "versicolor", + } +) +virginica = pd.DataFrame( + { + "sepal_length": np.random.normal(6.6, 0.64, n_per_group), + "sepal_width": np.random.normal(3.0, 0.32, n_per_group), + "species": "virginica", + } +) +data = pd.concat([setosa, versicolor, virginica], ignore_index=True) + +# Color palette matching style guide +colors = ["#306998", "#FFD43B", "#DC2626", "#059669", "#8B5CF6", "#F97316"] + +# Create plot +fig = px.scatter( + data, + x="sepal_length", + y="sepal_width", + color="species", + color_discrete_sequence=colors, + title="Scatter Plot with Color Groups", +) + +# Update layout for styling +fig.update_layout( + template="plotly_white", + xaxis_title="Sepal Length (cm)", + yaxis_title="Sepal Width (cm)", + title={"font": {"size": 40}, "x": 0.5, "xanchor": "center"}, + font={"size": 32}, + legend={"title": {"text": "Species", "font": {"size": 32}}, "font": {"size": 28}, "itemsizing": "constant"}, +) + +# Update axes for readability +fig.update_xaxes(title_font={"size": 40}, tickfont={"size": 32}, gridcolor="#E5E5E5") +fig.update_yaxes(title_font={"size": 40}, tickfont={"size": 32}, gridcolor="#E5E5E5") + +# Update markers for better visibility +fig.update_traces(marker={"size": 16, "opacity": 0.8, "line": {"width": 1, "color": "white"}}) + +# Save as PNG (4800 x 2700 px) +fig.write_image("plot.png", width=1600, height=900, scale=3) diff --git a/plots/plotnine/point/scatter-color-groups/default.py b/plots/plotnine/point/scatter-color-groups/default.py new file mode 100644 index 0000000000..5c63833376 --- /dev/null +++ b/plots/plotnine/point/scatter-color-groups/default.py @@ -0,0 +1,43 @@ +""" +scatter-color-groups: Scatter Plot with Color Groups +Library: plotnine +""" + +import numpy as np +import pandas as pd +from plotnine import aes, geom_point, ggplot, labs, scale_color_manual, theme, theme_minimal + + +# Data - Iris-like dataset +np.random.seed(42) +n_per_group = 50 + +data = pd.DataFrame({ + "sepal_length": np.concatenate([ + np.random.normal(5.0, 0.35, n_per_group), + np.random.normal(5.9, 0.50, n_per_group), + np.random.normal(6.6, 0.60, n_per_group), + ]), + "sepal_width": np.concatenate([ + np.random.normal(3.4, 0.38, n_per_group), + np.random.normal(2.8, 0.30, n_per_group), + np.random.normal(3.0, 0.30, n_per_group), + ]), + "species": ["setosa"] * n_per_group + ["versicolor"] * n_per_group + ["virginica"] * n_per_group, +}) + +# Color palette (from style guide) +colors = ["#306998", "#FFD43B", "#DC2626"] + +# Create plot +plot = ( + ggplot(data, aes(x="sepal_length", y="sepal_width", color="species")) + + geom_point(size=3, alpha=0.7) + + labs(x="Sepal Length (cm)", y="Sepal Width (cm)", title="Scatter Plot with Color Groups", color="Species") + + scale_color_manual(values=colors) + + theme_minimal() + + theme(figure_size=(16, 9)) +) + +# Save +plot.save("plot.png", dpi=300) diff --git a/plots/pygal/xy/scatter-color-groups/default.py b/plots/pygal/xy/scatter-color-groups/default.py new file mode 100644 index 0000000000..1fc461b399 --- /dev/null +++ b/plots/pygal/xy/scatter-color-groups/default.py @@ -0,0 +1,65 @@ +""" +scatter-color-groups: Scatter Plot with Color Groups +Library: pygal +""" + +import numpy as np +import pygal +from pygal.style import Style + + +# Generate iris-like sample data +np.random.seed(42) + +# Setosa cluster (short sepal length, wide sepal width) +setosa_x = np.random.normal(5.0, 0.35, 50) +setosa_y = np.random.normal(3.4, 0.38, 50) + +# Versicolor cluster (medium sepal length, medium sepal width) +versicolor_x = np.random.normal(5.9, 0.52, 50) +versicolor_y = np.random.normal(2.8, 0.31, 50) + +# Virginica cluster (long sepal length, medium-wide sepal width) +virginica_x = np.random.normal(6.6, 0.64, 50) +virginica_y = np.random.normal(3.0, 0.32, 50) + +groups = { + "Setosa": list(zip(setosa_x, setosa_y, strict=True)), + "Versicolor": list(zip(versicolor_x, versicolor_y, strict=True)), + "Virginica": list(zip(virginica_x, virginica_y, strict=True)), +} + +# Style (using PyPlots.ai palette) +custom_style = Style( + background="white", + plot_background="white", + foreground="#333333", + foreground_strong="#333333", + foreground_subtle="#666666", + colors=("#306998", "#FFD43B", "#DC2626", "#059669", "#8B5CF6", "#F97316"), + title_font_size=40, + legend_font_size=32, + label_font_size=32, + major_label_font_size=32, +) + +# Create XY scatter chart +chart = pygal.XY( + width=4800, + height=2700, + style=custom_style, + title="Iris Sepal Dimensions by Species", + x_title="Sepal Length (cm)", + y_title="Sepal Width (cm)", + stroke=False, + show_x_guides=True, + show_y_guides=True, + dots_size=8, +) + +# Add data by group +for group_name, points in groups.items(): + chart.add(group_name, points) + +# Save +chart.render_to_png("plot.png") diff --git a/plots/seaborn/scatterplot/scatter-color-groups/default.py b/plots/seaborn/scatterplot/scatter-color-groups/default.py index ee868e43bf..222001e1ef 100644 --- a/plots/seaborn/scatterplot/scatter-color-groups/default.py +++ b/plots/seaborn/scatterplot/scatter-color-groups/default.py @@ -1,122 +1,39 @@ """ scatter-color-groups: Scatter Plot with Color Groups -Implementation for: seaborn -Variant: default -Python: 3.10+ +Library: seaborn """ -from typing import TYPE_CHECKING - import matplotlib.pyplot as plt -import pandas as pd import seaborn as sns -if TYPE_CHECKING: - from matplotlib.figure import Figure - - -def create_plot( - data: pd.DataFrame, - x: str, - y: str, - group: str, - figsize: tuple[float, float] = (16, 9), - alpha: float = 0.7, - size: float = 100, - title: str | None = None, - xlabel: str | None = None, - ylabel: str | None = None, - palette: str = "Set1", - **kwargs, -) -> "Figure": - """ - Create a scatter plot with points colored by categorical groups using seaborn. - - Visualizes data points in a 2D x-y space with distinct colors for each - categorical group, showing separate "color clouds" for different categories. - - Args: - data: Input DataFrame with required columns - x: Column name for x-axis values - y: Column name for y-axis values - group: Column name for categorical grouping and coloring - figsize: Figure size as (width, height) tuple (default: (16, 9)) - alpha: Transparency level for points (default: 0.7) - size: Point size (default: 100) - title: Plot title (default: None) - xlabel: Custom x-axis label (default: uses column name) - ylabel: Custom y-axis label (default: uses column name) - palette: Seaborn palette name (default: "Set1") - **kwargs: Additional parameters passed to scatterplot - - Returns: - Matplotlib Figure object - - Raises: - ValueError: If data is empty - KeyError: If required columns not found in data - - Example: - >>> import pandas as pd - >>> data = pd.DataFrame({ - ... 'x': [1, 2, 3, 4, 5, 6], - ... 'y': [2, 4, 3, 5, 6, 4], - ... 'group': ['A', 'A', 'B', 'B', 'C', 'C'] - ... }) - >>> fig = create_plot(data, 'x', 'y', 'group') - >>> plt.savefig('plot.png') - """ - # Input validation - if data.empty: - raise ValueError("Data cannot be empty") - - # Check required columns - required_cols = [x, y, group] - for col in required_cols: - if col not in data.columns: - available = ", ".join(data.columns) - raise KeyError(f"Column '{col}' not found in data. Available columns: {available}") - - # Create figure - fig, ax = plt.subplots(figsize=figsize) - - # Create scatter plot with hue for grouping - sns.scatterplot(data=data, x=x, y=y, hue=group, palette=palette, alpha=alpha, s=size, ax=ax, **kwargs) - - # Set labels - ax.set_xlabel(xlabel or x, fontsize=11) - ax.set_ylabel(ylabel or y, fontsize=11) - - # Add title if provided - if title: - ax.set_title(title, fontsize=12, fontweight="bold", pad=15) - - # Customize legend - ax.legend(title=group, loc="best", framealpha=0.9) - - # Add subtle grid - ax.grid(True, alpha=0.3, linestyle="--") - - # Layout - plt.tight_layout() - - return fig - - -if __name__ == "__main__": - # Sample data for testing - data = pd.DataFrame( - { - "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5], - "y": [2, 4, 3, 5, 6, 4, 7, 8, 9, 10, 3, 5, 4, 6, 7, 5], - "group": ["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C"], - } - ) - - # Create plot - fig = create_plot(data, "x", "y", "group", title="Scatter Plot with Color Groups") - - # Save for inspection - plt.savefig("plot.png", dpi=300, bbox_inches="tight") - print("Plot saved to plot.png") +# Data - use iris dataset as specified in the spec +data = sns.load_dataset("iris") + +# Create figure +fig, ax = plt.subplots(figsize=(16, 9)) + +# Plot - scatter with color by species (group) +sns.scatterplot( + data=data, + x="sepal_length", + y="sepal_width", + hue="species", + palette=["#306998", "#FFD43B", "#DC2626"], + s=100, + alpha=0.7, + ax=ax, +) + +# Labels and styling +ax.set_xlabel("Sepal Length (cm)", fontsize=20) +ax.set_ylabel("Sepal Width (cm)", fontsize=20) +ax.set_title("Iris Species by Sepal Dimensions", fontsize=20) +ax.tick_params(axis="both", labelsize=16) +ax.grid(True, alpha=0.3) + +# Legend styling +ax.legend(title="Species", fontsize=16, title_fontsize=16) + +plt.tight_layout() +plt.savefig("plot.png", dpi=300, bbox_inches="tight")