Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions plots/sequence-logo-basic/implementations/seaborn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
""" pyplots.ai
sequence-logo-basic: Sequence Logo for Motif Visualization
Library: seaborn 0.13.2 | Python 3.14.3
Quality: 92/100 | Created: 2026-03-06
"""

import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.font_manager import FontProperties
from matplotlib.patches import PathPatch
from matplotlib.textpath import TextPath


# Data - DNA transcription factor binding site motif (10 positions)
bases = ["A", "C", "G", "T"]

frequencies = np.array(
[
[0.05, 0.80, 0.10, 0.05], # pos 1: strong C
[0.70, 0.10, 0.10, 0.10], # pos 2: strong A
[0.05, 0.05, 0.85, 0.05], # pos 3: strong G
[0.10, 0.10, 0.10, 0.70], # pos 4: strong T
[0.25, 0.25, 0.25, 0.25], # pos 5: no preference
[0.60, 0.15, 0.15, 0.10], # pos 6: moderate A
[0.05, 0.05, 0.05, 0.85], # pos 7: strong T
[0.90, 0.03, 0.04, 0.03], # pos 8: very strong A
[0.10, 0.60, 0.20, 0.10], # pos 9: moderate C
[0.05, 0.05, 0.80, 0.10], # pos 10: strong G
]
)

n_positions = frequencies.shape[0]

# Calculate information content (bits) per position
# IC = log2(4) + sum(f * log2(f)) = 2 + sum(f * log2(f))
info_content = np.zeros(n_positions)
for i in range(n_positions):
entropy = sum(f * np.log2(f) for f in frequencies[i] if f > 0)
info_content[i] = 2.0 + entropy

# Standard DNA color scheme via seaborn palette management
base_color_list = ["#3AA655", "#4169E1", "#F5A623", "#E74C3C"]
base_palette = sns.color_palette(base_color_list)
base_colors = dict(zip(bases, base_palette, strict=True))

# Build frequency DataFrame for heatmap
freq_df = pd.DataFrame(frequencies.T, index=bases, columns=range(1, n_positions + 1))

# Plot setup with seaborn context and style
sns.set_context("talk", font_scale=1.0)
sns.set_style("white")
fig, (ax_logo, ax_heat) = plt.subplots(2, 1, figsize=(16, 9), height_ratios=[3.5, 1], gridspec_kw={"hspace": 0.45})

# --- Top panel: Sequence Logo ---
fp = FontProperties(family="monospace", weight="bold")
letter_width = 0.78

for pos in range(n_positions):
ic = info_content[pos]
letter_heights = frequencies[pos] * ic
sorted_indices = np.argsort(letter_heights)
y_offset = 0.0

for idx in sorted_indices:
height = letter_heights[idx]
if height < 0.01:
continue

letter = bases[idx]
color = base_colors[letter]
x_center = pos
x_left = x_center - letter_width / 2

tp = TextPath((0, 0), letter, size=1, prop=fp)
bbox = tp.get_extents()
if bbox.width == 0 or bbox.height == 0:
continue

scale_x = letter_width / bbox.width
scale_y = height / bbox.height
tx = x_left - bbox.x0 * scale_x
ty = y_offset - bbox.y0 * scale_y

transform = mtransforms.Affine2D().scale(scale_x, scale_y).translate(tx, ty) + ax_logo.transData
patch = PathPatch(tp, facecolor=color, edgecolor="none", transform=transform)
ax_logo.add_patch(patch)
y_offset += height

# Logo axis styling
ax_logo.set_xlim(-0.6, n_positions - 0.4)
ax_logo.set_ylim(0, 2.1)
ax_logo.set_xticks(range(n_positions))
ax_logo.set_xticklabels(range(1, n_positions + 1))
ax_logo.set_xlabel("Position", fontsize=20)
ax_logo.set_ylabel("Information content (bits)", fontsize=20)
ax_logo.set_title("sequence-logo-basic \u00b7 seaborn \u00b7 pyplots.ai", fontsize=24, fontweight="medium", pad=15)
ax_logo.tick_params(axis="both", labelsize=16)
sns.despine(ax=ax_logo, top=True, right=True)
ax_logo.yaxis.grid(True, alpha=0.15, linewidth=0.5, color="#cccccc")
ax_logo.set_axisbelow(True)

# Highlight the most conserved position
max_ic_pos = int(np.argmax(info_content))
ax_logo.axvspan(max_ic_pos - 0.42, max_ic_pos + 0.42, color="#ffd700", alpha=0.12, zorder=0)

# Conservation annotation
ax_logo.annotate(
f"Most conserved\n({info_content[max_ic_pos]:.1f} bits)",
xy=(max_ic_pos, info_content[max_ic_pos]),
xytext=(max_ic_pos - 2.5, 1.90),
fontsize=13,
fontstyle="italic",
color="#444444",
arrowprops={"arrowstyle": "->", "color": "#888888", "lw": 1.3, "connectionstyle": "arc3,rad=-0.2"},
ha="center",
va="center",
)

# --- Bottom panel: Frequency heatmap using seaborn ---
sns.heatmap(
freq_df,
ax=ax_heat,
cmap=sns.light_palette("#306998", as_cmap=True),
annot=True,
fmt=".2f",
annot_kws={"fontsize": 12, "fontweight": "medium"},
linewidths=1.5,
linecolor="white",
cbar_kws={"label": "Frequency", "shrink": 0.8, "aspect": 15, "pad": 0.02},
vmin=0,
vmax=1,
square=False,
)
ax_heat.set_xlabel("Position", fontsize=16)
ax_heat.set_ylabel("", fontsize=16)
ax_heat.tick_params(axis="both", labelsize=14)
ax_heat.tick_params(axis="y", rotation=0)

# Color the y-axis base labels to match the logo colors
for tick_label in ax_heat.get_yticklabels():
base = tick_label.get_text()
if base in base_colors:
tick_label.set_color(base_colors[base])
tick_label.set_fontweight("bold")

plt.savefig("plot.png", dpi=300, bbox_inches="tight")
234 changes: 234 additions & 0 deletions plots/sequence-logo-basic/metadata/seaborn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
library: seaborn
specification_id: sequence-logo-basic
created: '2026-03-06T20:26:36Z'
updated: '2026-03-06T20:58:49Z'
generated_by: claude-opus-4-5-20251101
workflow_run: 22780524945
issue: 4421
python_version: 3.14.3
library_version: 0.13.2
preview_url: https://storage.googleapis.com/pyplots-images/plots/sequence-logo-basic/seaborn/plot.png
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/sequence-logo-basic/seaborn/plot_thumb.png
preview_html: null
quality_score: 92
review:
strengths:
- Excellent two-panel design combining sequence logo with frequency heatmap for
complementary views
- Properly rendered letter glyphs using TextPath/PathPatch (correct technique for
sequence logos)
- Strong data storytelling with highlighted most-conserved position and annotation
arrow
- Colored heatmap y-axis labels matching logo colors — thoughtful design touch
- Well-chosen data showing full range of conservation levels including uniform position
weaknesses:
- Minor colorblind concern with green-red pairing (though this is the standard bioinformatics
convention)
image_description: 'The plot features a two-panel layout on a white background.
The top panel is a sequence logo showing 10 positions of a DNA transcription factor
binding site. Letters (A in green, C in blue, G in orange, T in red) are stacked
vertically at each position, with height proportional to information content in
bits (y-axis, 0–2). Position 8 has the tallest stack (~1.4 bits, dominated by
a large green "A") and is highlighted with a pale yellow vertical band and an
italic annotation "Most conserved (1.4 bits)" with a curved arrow. Position 5
is nearly empty (uniform distribution). The title reads "sequence-logo-basic ·
seaborn · pyplots.ai" in medium weight at the top. Top and right spines are removed;
a very subtle horizontal grid is visible. The bottom panel is a seaborn heatmap
showing the raw frequency matrix (4 rows: A, C, G, T × 10 columns) with annotated
values (.2f format), colored from white to dark blue. Y-axis labels are colored
to match their respective base colors (green A, blue C, orange G, red T). A "Frequency"
colorbar appears on the right. Both panels share "Position" as the x-axis label.'
criteria_checklist:
visual_quality:
score: 29
max: 30
items:
- id: VQ-01
name: Text Legibility
score: 8
max: 8
passed: true
comment: 'All font sizes explicitly set: title 24pt, labels 20pt, ticks 16pt,
heatmap annotations 12pt'
- id: VQ-02
name: No Overlap
score: 6
max: 6
passed: true
comment: No overlapping text or elements; annotation well-positioned
- id: VQ-03
name: Element Visibility
score: 6
max: 6
passed: true
comment: Letter glyphs properly scaled and clearly visible at all positions
- id: VQ-04
name: Color Accessibility
score: 3
max: 4
passed: true
comment: Standard DNA colors; green-red pairing is mild colorblind concern
but established convention
- id: VQ-05
name: Layout & Canvas
score: 4
max: 4
passed: true
comment: Two-panel layout with 3.5:1 ratio gives proper emphasis; good use
of 16x9 canvas
- id: VQ-06
name: Axis Labels & Title
score: 2
max: 2
passed: true
comment: Position and Information content (bits) with units
design_excellence:
score: 16
max: 20
items:
- id: DE-01
name: Aesthetic Sophistication
score: 6
max: 8
passed: true
comment: Custom DNA palette, two-panel layout, colored heatmap y-labels, annotation
with curved arrow
- id: DE-02
name: Visual Refinement
score: 5
max: 6
passed: true
comment: Spines removed, subtle grid alpha=0.15, white linecolor in heatmap,
generous panel spacing
- id: DE-03
name: Data Storytelling
score: 5
max: 6
passed: true
comment: Yellow highlight on most conserved position with annotation; heatmap
provides complementary detail
spec_compliance:
score: 15
max: 15
items:
- id: SC-01
name: Plot Type
score: 5
max: 5
passed: true
comment: Correct sequence logo with vertically stacked letters scaled by information
content
- id: SC-02
name: Required Features
score: 4
max: 4
passed: true
comment: 'All spec features: stacked letters, IC scaling, standard colors,
scaled glyphs'
- id: SC-03
name: Data Mapping
score: 3
max: 3
passed: true
comment: X-axis positions 1-10, Y-axis IC in bits 0-2 range
- id: SC-04
name: Title & Legend
score: 3
max: 3
passed: true
comment: Correct title format; letters self-identify by color and shape
data_quality:
score: 15
max: 15
items:
- id: DQ-01
name: Feature Coverage
score: 6
max: 6
passed: true
comment: 'Full range: strong conservation (pos 3,7,8), moderate (pos 2,6,9),
none (pos 5)'
- id: DQ-02
name: Realistic Context
score: 5
max: 5
passed: true
comment: DNA transcription factor binding site motif — real bioinformatics
context
- id: DQ-03
name: Appropriate Scale
score: 4
max: 4
passed: true
comment: IC values 0-1.4 bits realistic for DNA; frequencies sum to 1 per
position
code_quality:
score: 10
max: 10
items:
- id: CQ-01
name: KISS Structure
score: 3
max: 3
passed: true
comment: 'Linear flow: imports, data, IC calculation, logo rendering, heatmap,
save'
- id: CQ-02
name: Reproducibility
score: 2
max: 2
passed: true
comment: Fully deterministic hardcoded frequency data
- id: CQ-03
name: Clean Imports
score: 2
max: 2
passed: true
comment: All imports used
- id: CQ-04
name: Code Elegance
score: 2
max: 2
passed: true
comment: Clean, well-structured; TextPath/PathPatch is correct technique
- id: CQ-05
name: Output & API
score: 1
max: 1
passed: true
comment: Saves as plot.png, dpi=300, bbox_inches=tight, no deprecated API
library_mastery:
score: 7
max: 10
items:
- id: LM-01
name: Idiomatic Usage
score: 4
max: 5
passed: true
comment: Good use of sns.set_context, set_style, despine, heatmap, color_palette,
light_palette
- id: LM-02
name: Distinctive Features
score: 3
max: 5
passed: true
comment: sns.heatmap with annotations, sns.light_palette for custom cmap,
sns.despine
verdict: APPROVED
impl_tags:
dependencies: []
techniques:
- subplots
- annotations
- patches
- manual-ticks
- colorbar
patterns:
- data-generation
- iteration-over-groups
dataprep: []
styling:
- grid-styling
- custom-colormap
- edge-highlighting