Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions plots/residual-plot/implementations/altair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
""" pyplots.ai
residual-plot: Residual Plot
Library: altair 6.0.0 | Python 3.13.11
Quality: 91/100 | Created: 2025-12-26
"""

import altair as alt
import numpy as np
import pandas as pd


# Data: Simulate a linear regression scenario with some non-linearity
np.random.seed(42)
n = 150

# Generate realistic housing price prediction scenario
x = np.linspace(1000, 3000, n) # House size in sq ft
noise = np.random.randn(n) * 15000
y_true = 50000 + 150 * x + 0.02 * (x - 2000) ** 2 + noise # True prices with slight curvature
y_pred = 50000 + 155 * x # Linear model predictions

residuals = y_true - y_pred
std_residual = np.std(residuals)

# Identify outliers (beyond ±2 standard deviations)
is_outlier = np.abs(residuals) > 2 * std_residual

# Create DataFrame
df = pd.DataFrame(
{
"Fitted Values ($)": y_pred,
"Residuals ($)": residuals,
"Outlier": np.where(is_outlier, "Outlier (>2σ)", "Normal"),
}
)

# Base scatter plot with color encoding for outliers
scatter = (
alt.Chart(df)
.mark_point(size=120, opacity=0.7)
.encode(
x=alt.X("Fitted Values ($):Q", title="Fitted Values ($)", scale=alt.Scale(nice=True)),
y=alt.Y("Residuals ($):Q", title="Residuals ($)", scale=alt.Scale(nice=True)),
color=alt.Color(
"Outlier:N",
scale=alt.Scale(domain=["Normal", "Outlier (>2σ)"], range=["#306998", "#FFD43B"]),
legend=alt.Legend(title="Point Type", titleFontSize=18, labelFontSize=16),
),
tooltip=["Fitted Values ($):Q", "Residuals ($):Q", "Outlier:N"],
)
)

# Zero reference line
zero_line = (
alt.Chart(pd.DataFrame({"y": [0]})).mark_rule(color="#333333", strokeWidth=2, strokeDash=[8, 4]).encode(y="y:Q")
)

# ±2 standard deviation bands
bands_df = pd.DataFrame({"y": [2 * std_residual, -2 * std_residual], "label": ["+2σ", "-2σ"]})

band_lines = alt.Chart(bands_df).mark_rule(color="#888888", strokeWidth=1.5, strokeDash=[4, 4]).encode(y="y:Q")

# Add LOWESS-like trend using polynomial regression
loess_df = df.copy()
loess_df = loess_df.sort_values("Fitted Values ($)")

loess_line = (
alt.Chart(loess_df)
.transform_loess("Fitted Values ($)", "Residuals ($)", bandwidth=0.3)
.mark_line(color="#E24A33", strokeWidth=3)
.encode(x="Fitted Values ($):Q", y="Residuals ($):Q")
)

# Combine all layers
chart = (
alt.layer(zero_line, band_lines, scatter, loess_line)
.properties(
width=1600,
height=900,
title=alt.Title(text="residual-plot · altair · pyplots.ai", fontSize=28, anchor="middle"),
)
.configure_axis(labelFontSize=18, titleFontSize=22, gridOpacity=0.3)
.configure_view(strokeWidth=0)
.configure_legend(orient="right", padding=10)
)

# Save outputs
chart.save("plot.png", scale_factor=3.0)
chart.save("plot.html")
29 changes: 29 additions & 0 deletions plots/residual-plot/metadata/altair.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
library: altair
specification_id: residual-plot
created: '2025-12-26T19:35:59Z'
updated: '2025-12-26T19:41:11Z'
generated_by: claude-opus-4-5-20251101
workflow_run: 20528204950
issue: 0
python_version: 3.13.11
library_version: 6.0.0
preview_url: https://storage.googleapis.com/pyplots-images/plots/residual-plot/altair/plot.png
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/residual-plot/altair/plot_thumb.png
preview_html: https://storage.googleapis.com/pyplots-images/plots/residual-plot/altair/plot.html
quality_score: 91
review:
strengths:
- Excellent implementation of all specification requirements including zero reference
line, LOESS smoothing, ±2σ bands, and outlier highlighting
- Clear demonstration of non-linear residual pattern through LOESS line - educationally
valuable
- Strong colorblind-safe palette with Python logo colors (blue/yellow)
- Good use of Altair declarative layering for combining multiple visual elements
- Realistic housing price scenario makes the plot immediately comprehensible
- Proper title format and descriptive axis labels with units
weaknesses:
- Legend placement is slightly isolated from the main chart area - could be positioned
closer
- Points use outlined circles which can be slightly less visible than filled circles
at smaller sizes
- The grid lines behind the ±2σ dashed lines create slight visual noise