Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions plots/scatter-matrix-interactive/implementations/plotnine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
""" pyplots.ai
scatter-matrix-interactive: Interactive Scatter Plot Matrix (SPLOM)
Library: plotnine 0.15.2 | Python 3.13.11
Quality: 72/100 | Created: 2026-01-10
"""

import numpy as np
import pandas as pd
from plotnine import (
aes,
element_line,
element_rect,
element_text,
facet_grid,
geom_point,
geom_ribbon,
ggplot,
labs,
scale_color_manual,
scale_fill_manual,
theme,
theme_minimal,
)
from sklearn.datasets import load_iris


# Data: Iris dataset for multivariate analysis
np.random.seed(42)
iris = load_iris()
df = pd.DataFrame(iris.data, columns=["Sepal Length (cm)", "Sepal Width (cm)", "Petal Length (cm)", "Petal Width (cm)"])
df["Species"] = pd.Categorical([iris.target_names[i] for i in iris.target])

# Variables for the matrix (already include units)
variables = ["Sepal Length (cm)", "Sepal Width (cm)", "Petal Length (cm)", "Petal Width (cm)"]

# Colorblind-safe palette (Dark2 inspired - teal, orange, purple)
colors = ["#1B9E77", "#D95F02", "#7570B3"]

# Create long-form data for scatter matrix
scatter_data = []
density_data = []

for i, var_y in enumerate(variables):
for j, var_x in enumerate(variables):
if i == j:
# Diagonal: Create normalized density data that fits within the variable's range
var_min, var_max = df[var_x].min(), df[var_x].max()
var_range = var_max - var_min
# Use baseline slightly above min for visual clarity
baseline = var_min

for species in df["Species"].unique():
species_vals = df[df["Species"] == species][var_x].values
# Simple histogram-based density
hist, edges = np.histogram(species_vals, bins=20, range=(var_min, var_max), density=True)
# Normalize to fit within the y-axis range (scale to var_range * 0.5)
max_density = hist.max() if hist.max() > 0 else 1
hist_scaled = hist / max_density * var_range * 0.5 + baseline
bin_centers = (edges[:-1] + edges[1:]) / 2

for k in range(len(bin_centers)):
density_data.append(
{
"x": bin_centers[k],
"ymin": baseline,
"ymax": hist_scaled[k],
"Species": species,
"var_x": var_x,
"var_y": var_y,
}
)
else:
# Off-diagonal: scatter data
for _, row in df.iterrows():
scatter_data.append(
{"x": row[var_x], "y": row[var_y], "Species": row["Species"], "var_x": var_x, "var_y": var_y}
)

scatter_df = pd.DataFrame(scatter_data)
density_df = pd.DataFrame(density_data)

# Set factor levels for proper ordering
scatter_df["var_x"] = pd.Categorical(scatter_df["var_x"], categories=variables, ordered=True)
scatter_df["var_y"] = pd.Categorical(scatter_df["var_y"], categories=variables[::-1], ordered=True)
density_df["var_x"] = pd.Categorical(density_df["var_x"], categories=variables, ordered=True)
density_df["var_y"] = pd.Categorical(density_df["var_y"], categories=variables[::-1], ordered=True)

# Sort density data for proper ribbon rendering
density_df = density_df.sort_values(["var_x", "var_y", "Species", "x"])

# Create scatter plot matrix with density ribbons on diagonal
plot = (
ggplot(mapping=aes(x="x"))
+ geom_point(data=scatter_df, mapping=aes(y="y", color="Species"), size=3.5, alpha=0.7)
+ geom_ribbon(data=density_df, mapping=aes(ymin="ymin", ymax="ymax", fill="Species"), alpha=0.5)
+ facet_grid("var_y ~ var_x", scales="free")
+ scale_color_manual(values=colors)
+ scale_fill_manual(values=colors)
+ labs(title="scatter-matrix-interactive · plotnine · pyplots.ai", x="", y="")
+ theme_minimal()
+ theme(
figure_size=(16, 16),
plot_title=element_text(size=24, weight="bold", ha="left"),
strip_text_x=element_text(size=14),
strip_text_y=element_text(size=14, angle=0),
axis_text=element_text(size=11),
axis_title_x=element_text(size=16),
axis_title_y=element_text(size=16),
legend_title=element_text(size=16),
legend_text=element_text(size=14),
legend_position="bottom",
legend_background=element_rect(fill="white", alpha=0.9),
panel_spacing=0.03,
panel_grid_major=element_line(color="#cccccc", alpha=0.3),
panel_grid_minor=element_line(color="#eeeeee", alpha=0.2),
panel_background=element_rect(fill="white"),
)
)

# Save plot
plot.save("plot.png", dpi=300, width=16, height=16, verbose=False)
212 changes: 212 additions & 0 deletions plots/scatter-matrix-interactive/metadata/plotnine.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
library: plotnine
specification_id: scatter-matrix-interactive
created: '2026-01-10T01:57:41Z'
updated: '2026-01-10T02:24:36Z'
generated_by: claude-opus-4-5-20251101
workflow_run: 20870868420
issue: 3604
python_version: 3.13.11
library_version: 0.15.2
preview_url: https://storage.googleapis.com/pyplots-images/plots/scatter-matrix-interactive/plotnine/plot.png
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/scatter-matrix-interactive/plotnine/plot_thumb.png
preview_html: null
quality_score: 72
review:
strengths:
- Excellent use of plotnine grammar of graphics with facet_grid for matrix layout
- Creative solution using geom_ribbon for histogram-based density distributions
on diagonal
- Colorblind-safe palette (Dark2-inspired teal, orange, purple)
- Clean data transformation from wide to long format for faceting
- Good visual separation of the three Iris species clusters
- Proper title format following specification
weaknesses:
- Missing interactive features (brushing, linked selection, zoom/pan) - plotnine
is static library
- Axis labels are empty strings - variable names only shown in strip text
- Diagonal density plots y-axis scale does not match the variable natural scale
image_description: 'The plot displays a 4×4 scatter plot matrix (SPLOM) using the
Iris dataset with four variables: Sepal Length (cm), Sepal Width (cm), Petal Length
(cm), and Petal Width (cm). The matrix uses a colorblind-safe Dark2-inspired palette
with teal (#1B9E77) for setosa, orange (#D95F02) for versicolor, and purple (#7570B3)
for virginica. Off-diagonal cells show scatter plots with points colored by species
at alpha=0.7. Diagonal cells display histogram-based density distributions as
filled ribbon areas with alpha=0.5, showing the univariate distribution of each
variable per species. The title "scatter-matrix-interactive · plotnine · pyplots.ai"
appears at top-left in bold. Strip labels on top show variable names for x-axis,
and strip labels on right show variable names for y-axis. A legend at the bottom
indicates species color mapping. The grid is subtle with light gray lines. The
layout is clean with minimal spacing between panels.'
criteria_checklist:
visual_quality:
score: 32
max: 40
items:
- id: VQ-01
name: Text Legibility
score: 8
max: 10
passed: true
comment: Title and labels readable, strip text slightly small but acceptable
- id: VQ-02
name: No Overlap
score: 8
max: 8
passed: true
comment: No overlapping text elements
- id: VQ-03
name: Element Visibility
score: 6
max: 8
passed: true
comment: Points visible with good alpha, density ribbons clear but could use
more contrast
- id: VQ-04
name: Color Accessibility
score: 5
max: 5
passed: true
comment: Colorblind-safe palette (Dark2 teal/orange/purple)
- id: VQ-05
name: Layout Balance
score: 3
max: 5
passed: true
comment: Good 4x4 grid but some wasted space around edges
- id: VQ-06
name: Axis Labels
score: 0
max: 2
passed: false
comment: Axis labels are empty (shown in strip text instead)
- id: VQ-07
name: Grid & Legend
score: 2
max: 2
passed: true
comment: Subtle grid, legend well-placed at bottom
spec_compliance:
score: 17
max: 25
items:
- id: SC-01
name: Plot Type
score: 8
max: 8
passed: true
comment: Correct scatter matrix with density diagonals
- id: SC-02
name: Data Mapping
score: 5
max: 5
passed: true
comment: All 4 variables correctly mapped to pairwise scatter
- id: SC-03
name: Required Features
score: 0
max: 5
passed: false
comment: 'Missing interactive features: no brushing/linked selection, no zoom/pan
(plotnine is static - spec notes limitations)'
- id: SC-04
name: Data Range
score: 3
max: 3
passed: true
comment: All data visible within axes
- id: SC-05
name: Legend Accuracy
score: 1
max: 2
passed: false
comment: Species legend correct but no interactive indicator
- id: SC-06
name: Title Format
score: 2
max: 2
passed: true
comment: 'Correct format: scatter-matrix-interactive · plotnine · pyplots.ai'
data_quality:
score: 18
max: 20
items:
- id: DQ-01
name: Feature Coverage
score: 7
max: 8
passed: true
comment: Shows correlations, clusters separation visible, density distributions
on diagonal
- id: DQ-02
name: Realistic Context
score: 7
max: 7
passed: true
comment: Classic Iris dataset - real botanical data
- id: DQ-03
name: Appropriate Scale
score: 5
max: 5
passed: true
comment: Real measurements in centimeters
code_quality:
score: 10
max: 10
items:
- id: CQ-01
name: KISS Structure
score: 3
max: 3
passed: true
comment: 'Linear flow: imports -> data -> plot -> save'
- id: CQ-02
name: Reproducibility
score: 3
max: 3
passed: true
comment: np.random.seed(42) set
- id: CQ-03
name: Clean Imports
score: 2
max: 2
passed: true
comment: All imports used
- id: CQ-04
name: No Deprecated API
score: 1
max: 1
passed: true
comment: Current plotnine syntax
- id: CQ-05
name: Output Correct
score: 1
max: 1
passed: true
comment: Saves as plot.png
library_features:
score: 5
max: 5
items:
- id: LF-01
name: Distinctive Features
score: 5
max: 5
passed: true
comment: 'Excellent use of ggplot2 grammar: facet_grid, geom_ribbon for density,
aes mapping, scale_color_manual, theme customization'
verdict: APPROVED
impl_tags:
dependencies:
- sklearn
techniques:
- faceting
- layer-composition
patterns:
- dataset-loading
- wide-to-long
- iteration-over-groups
dataprep:
- binning
styling:
- alpha-blending
- grid-styling