Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 62 additions & 103 deletions plots/scatter-basic/implementations/python/pygal.py
Original file line number Diff line number Diff line change
@@ -1,141 +1,100 @@
""" pyplots.ai
""" anyplot.ai
scatter-basic: Basic Scatter Plot
Library: pygal 3.1.0 | Python 3.14
Quality: 87/100 | Created: 2025-12-22
Library: pygal 3.1.0 | Python 3.14.4
Quality: 87/100 | Created: 2026-04-23
"""

import numpy as np
import pygal
from pygal.style import Style
import os
import sys


# Data — study hours vs exam scores with realistic positive correlation
np.random.seed(42)
n = 115
study_hours = np.random.uniform(2, 14, n)
exam_scores = study_hours * 4.5 + np.random.normal(0, 5.5, n) + 25
exam_scores = np.clip(exam_scores, 15, 100)
# Script filename shadows the installed `pygal` package when run as `python pygal.py`;
# dropping the script directory from sys.path lets the real package resolve.
sys.path.pop(0)

import numpy as np # noqa: E402
import pygal # noqa: E402
from pygal.style import Style # noqa: E402


# Add deliberate outliers showcasing scatter diversity (high/low performers)
outlier_hours = np.array([3.0, 12.5, 7.0, 11.0, 4.5])
outlier_scores = np.array([82.0, 42.0, 95.0, 48.0, 78.0])
study_hours = np.concatenate([study_hours, outlier_hours])
exam_scores = np.concatenate([exam_scores, outlier_scores])
# Theme tokens
THEME = os.getenv("ANYPLOT_THEME", "light")
PAGE_BG = "#FAF8F1" if THEME == "light" else "#1A1A17"
INK = "#1A1A17" if THEME == "light" else "#F0EFE8"
INK_SOFT = "#4A4A44" if THEME == "light" else "#B8B7B0"
INK_MUTED = "#8A8A82" if THEME == "light" else "#6E6D66"

# Compute trend line (linear regression) for data storytelling
coeffs = np.polyfit(study_hours, exam_scores, 1)
slope, intercept = coeffs
r = np.corrcoef(study_hours, exam_scores)[0, 1]
trend_x = np.linspace(study_hours.min(), study_hours.max(), 50)
trend_y = slope * trend_x + intercept
OKABE_ITO = ("#009E73", "#D55E00", "#0072B2", "#CC79A7", "#E69F00", "#56B4E9", "#F0E442")

# Data — study hours vs exam scores, moderate positive correlation
np.random.seed(42)
n = 180
study_hours = np.random.uniform(1.5, 13.5, n)
exam_scores = study_hours * 4.8 + np.random.normal(0, 6.5, n) + 26
exam_scores = np.clip(exam_scores, 20, 100)

# Identify notable outliers for annotation
residuals = exam_scores - (slope * study_hours + intercept)
top_outlier_idx = int(np.argmax(residuals))
bottom_outlier_idx = int(np.argmin(residuals))
# Visual hierarchy: split at conventional 70% passing threshold to guide the
# viewer's eye and convey the "hours → outcome" narrative beyond a raw cloud.
PASSING = 70.0
above = [(float(h), float(s)) for h, s in zip(study_hours, exam_scores, strict=True) if s >= PASSING]
below = [(float(h), float(s)) for h, s in zip(study_hours, exam_scores, strict=True) if s < PASSING]

# Shared font family
font = "DejaVu Sans, Helvetica, Arial, sans-serif"

# Refined style for 4800x2700 px canvas — subtle, professional palette
custom_style = Style(
background="white",
plot_background="#f7f7f7",
foreground="#2a2a2a",
foreground_strong="#2a2a2a",
foreground_subtle="#e0e0e0",
guide_stroke_color="#e0e0e0",
guide_stroke_dasharray="4, 4",
colors=("#306998", "#d64541", "#e8a838"),
background=PAGE_BG,
plot_background=PAGE_BG,
foreground=INK_SOFT,
foreground_strong=INK,
foreground_subtle=INK_MUTED,
colors=OKABE_ITO,
font_family=font,
title_font_family=font,
title_font_size=56,
label_font_size=42,
major_label_font_size=38,
legend_font_size=34,
label_font_family=font,
major_label_font_family=font,
legend_font_family=font,
value_font_size=28,
tooltip_font_size=28,
tooltip_font_family=font,
opacity=0.65,
title_font_size=52,
label_font_size=40,
major_label_font_size=36,
legend_font_size=34,
tooltip_font_size=28,
value_font_size=26,
opacity=0.7,
opacity_hover=0.95,
stroke_opacity=1,
stroke_opacity_hover=1,
)

# Axis range tightened to data bounds for better canvas utilization
x_min, x_max = float(np.floor(study_hours.min())), float(np.ceil(study_hours.max()))
y_min = float(max(0, np.floor(exam_scores.min() / 5) * 5))
y_max = float(min(100, np.ceil(exam_scores.max() / 5) * 5 + 5))

# Create XY chart
chart = pygal.XY(
width=4800,
height=2700,
style=custom_style,
title="scatter-basic \u00b7 pygal \u00b7 pyplots.ai",
title="scatter-basic · pygal · anyplot.ai",
x_title="Study Hours per Week (hrs)",
y_title="Exam Score (%)",
stroke=False,
dots_size=17,
show_legend=True,
legend_at_bottom=True,
legend_at_bottom_columns=3,
legend_box_size=24,
stroke=False,
dots_size=9,
legend_at_bottom_columns=2,
legend_box_size=32,
show_x_guides=True,
show_y_guides=True,
x_value_formatter=lambda x: f"{x:.0f}",
value_formatter=lambda y: f"{y:.0f}%",
margin_bottom=100,
margin_left=60,
margin_right=40,
margin_top=50,
x_label_rotation=0,
truncate_legend=-1,
range=(y_min, y_max),
xrange=(x_min, x_max),
x_value_formatter=lambda v: f"{v:.0f}",
value_formatter=lambda v: f"{v:.0f}%",
range=(15, 100),
xrange=(1, 14),
x_labels_major_count=7,
y_labels_major_count=9,
margin=60,
print_values=False,
print_zeroes=False,
js=[],
)

# Add scatter data as list of (x, y) tuples
points = [(float(h), float(s)) for h, s in zip(study_hours, exam_scores, strict=True)]
chart.add(
f"Students (n={len(points)})",
points,
stroke=False,
formatter=lambda x: f"({x[0]:.1f} hrs, {x[1]:.0f}%)" if isinstance(x, (tuple, list)) else f"{x:.0f}",
)

# Add trend line — dashed stroke for visual contrast
trend_points = [(float(x), float(y)) for x, y in zip(trend_x, trend_y, strict=True)]
chart.add(
f"Trend (r = {r:.2f})",
trend_points,
stroke=True,
show_dots=False,
stroke_style={"width": 14, "dasharray": "32, 14", "linecap": "round", "linejoin": "round"},
)

# Annotate notable outliers — pygal per-point metadata with label styling
oh = float(study_hours[top_outlier_idx])
os_ = float(exam_scores[top_outlier_idx])
bh = float(study_hours[bottom_outlier_idx])
bs = float(exam_scores[bottom_outlier_idx])
chart.add(
"Outliers",
[
{"value": (oh, os_), "label": f"High performer ({oh:.0f}h \u2192 {os_:.0f}%)"},
{"value": (bh, bs), "label": f"Low performer ({bh:.0f}h \u2192 {bs:.0f}%)"},
],
stroke=False,
dots_size=16,
formatter=lambda x: f"{x[1]:.0f}%" if isinstance(x, (tuple, list)) else f"{x:.0f}",
)
chart.add("Passing (≥ 70%)", above)
chart.add("Below 70%", below)

# Save outputs — dual format leverages pygal's SVG-native + PNG capability
chart.render_to_png("plot.png")
chart.render_to_file("plot.html")
chart.render_to_png(f"plot-{THEME}.png")
with open(f"plot-{THEME}.html", "wb") as f:
f.write(chart.render())
Loading
Loading