diff --git a/plots/sequence-logo-basic/implementations/letsplot.py b/plots/sequence-logo-basic/implementations/letsplot.py new file mode 100644 index 0000000000..3d40ce7823 --- /dev/null +++ b/plots/sequence-logo-basic/implementations/letsplot.py @@ -0,0 +1,137 @@ +""" pyplots.ai +sequence-logo-basic: Sequence Logo for Motif Visualization +Library: letsplot 4.8.2 | Python 3.14.3 +Quality: 79/100 | Created: 2026-03-06 +""" + +import numpy as np +import pandas as pd +from lets_plot import * + + +LetsPlot.setup_html() + +# Data — 10-position DNA transcription factor binding site motif +positions = list(range(1, 11)) + +# Realistic motif frequencies (resembling a TATA-box-like binding site) +frequencies = { + 1: {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25}, + 2: {"A": 0.10, "C": 0.05, "G": 0.05, "T": 0.80}, + 3: {"A": 0.85, "C": 0.05, "G": 0.05, "T": 0.05}, + 4: {"A": 0.05, "C": 0.05, "G": 0.05, "T": 0.85}, + 5: {"A": 0.90, "C": 0.02, "G": 0.02, "T": 0.06}, + 6: {"A": 0.60, "C": 0.05, "G": 0.05, "T": 0.30}, + 7: {"A": 0.15, "C": 0.05, "G": 0.70, "T": 0.10}, + 8: {"A": 0.05, "C": 0.80, "G": 0.10, "T": 0.05}, + 9: {"A": 0.30, "C": 0.30, "G": 0.20, "T": 0.20}, + 10: {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25}, +} + +# Color scheme: A=green, C=blue, G=orange, T=red +color_map = {"A": "#2CA02C", "C": "#1F77B4", "G": "#FF7F0E", "T": "#D62728"} + +# Calculate information content and build letter data +rows = [] +max_info = 0.0 +for pos in positions: + freqs = frequencies[pos] + entropy = -sum(f * np.log2(f) for f in freqs.values() if f > 0) + info_content = 2.0 - entropy + + # Sort by frequency (least frequent at bottom, most frequent on top) + sorted_letters = sorted(freqs.items(), key=lambda x: x[1]) + + y_bottom = 0.0 + for letter, freq in sorted_letters: + height = freq * info_content + if height < 0.04: + y_bottom += height + continue + rows.append( + { + "position": pos, + "xmin": pos - 0.45, + "xmax": pos + 0.45, + "ymin": y_bottom, + "ymax": y_bottom + height, + "ymid": y_bottom + height / 2, + "height": height, + "letter": letter, + "frequency": freq, + "info_bits": round(info_content, 3), + } + ) + y_bottom += height + if info_content > max_info: + max_info = info_content + +df = pd.DataFrame(rows) + +# Legend data — invisible points to create a proper legend with square symbols +legend_df = pd.DataFrame({"x": [0] * 4, "y": [0] * 4, "letter": ["A", "C", "G", "T"]}) + +# Y-axis upper limit: round up to nearest 0.2 with small padding +y_max = np.ceil(max_info * 5) / 5 + 0.05 + +# Build plot with colored letters as the primary visual element +plot = ( + ggplot() + # Subtle background rectangles for structure + + geom_rect( + aes(xmin="xmin", xmax="xmax", ymin="ymin", ymax="ymax", fill="letter"), + data=df, + alpha=0.15, + color="rgba(0,0,0,0)", + size=0, + show_legend=False, + ) + # Colored letter glyphs — the primary visual element + + geom_text( + aes(x="position", y="ymid", label="letter", color="letter", size="height"), + data=df, + fontface="bold", + show_legend=False, + tooltips=layer_tooltips() + .format("@frequency", ".0%") + .format("@info_bits", ".3f") + .line("@letter") + .line("Frequency: @frequency") + .line("Info content: @info_bits bits"), + ) + # Invisible points for proper legend (square shape shows color blocks) + + geom_point( + aes(x="x", y="y", fill="letter"), + data=legend_df, + size=6, + shape=22, + color="rgba(0,0,0,0)", + alpha=0, + tooltips="none", + ) + # Manual color scales + + scale_fill_manual(values=color_map, name="Nucleotide", breaks=["A", "C", "G", "T"]) + + scale_color_manual(values=color_map) + + scale_size(range=[6, 36], guide="none") + + scale_x_continuous(breaks=positions, limits=[0.3, 10.7]) + + scale_y_continuous(limits=[0, y_max], breaks=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4]) + + guides(fill=guide_legend(override_aes={"size": 12, "alpha": 1.0})) + + labs(x="Position", y="Information content (bits)", title="sequence-logo-basic \u00b7 letsplot \u00b7 pyplots.ai") + + theme_minimal() + + theme( + plot_title=element_text(size=28, face="bold"), + axis_title=element_text(size=22), + axis_text=element_text(size=18), + legend_title=element_text(size=20, face="bold"), + legend_text=element_text(size=18), + panel_grid_major_x=element_blank(), + panel_grid_minor=element_blank(), + panel_grid_major_y=element_line(color="#E0E0E0", size=0.5), + plot_background=element_rect(color="white", fill="white"), + ) + + ggsize(1600, 900) +) + +# Save +ggsave(plot, "plot.png", scale=3, path=".") +ggsave(plot, "plot.html", path=".") diff --git a/plots/sequence-logo-basic/metadata/letsplot.yaml b/plots/sequence-logo-basic/metadata/letsplot.yaml new file mode 100644 index 0000000000..de316d1161 --- /dev/null +++ b/plots/sequence-logo-basic/metadata/letsplot.yaml @@ -0,0 +1,237 @@ +library: letsplot +specification_id: sequence-logo-basic +created: '2026-03-06T20:26:04Z' +updated: '2026-03-06T20:56:37Z' +generated_by: claude-opus-4-5-20251101 +workflow_run: 22780525013 +issue: 4421 +python_version: 3.14.3 +library_version: 4.8.2 +preview_url: https://storage.googleapis.com/pyplots-images/plots/sequence-logo-basic/letsplot/plot.png +preview_thumb: https://storage.googleapis.com/pyplots-images/plots/sequence-logo-basic/letsplot/plot_thumb.png +preview_html: https://storage.googleapis.com/pyplots-images/plots/sequence-logo-basic/letsplot/plot.html +quality_score: 79 +review: + strengths: + - Excellent realistic data choice (TATA-box motif) that immediately communicates + biological meaning + - Semi-transparent background rectangles are a creative solution that adds structure + and helps visualize letter boundaries + - Clean well-organized code with proper information content calculation + - Good use of lets-plot-specific features (layer_tooltips with formatted biological + data, HTML export) + - 'Strong visual refinement: subtle grid, removed x-grid lines, explicit font sizing' + weaknesses: + - Letters are not stretched to fill their allocated height as true sequence logos + require — fundamental lets-plot limitation with text rendering + - Small-frequency letters at the base of stacks are very small and hard to read + - Color scheme uses green+red which is challenging for colorblind users (though + spec-mandated for DNA) + image_description: 'The plot displays a sequence logo for a 10-position DNA transcription + factor binding site (TATA-box-like motif). At each position, nucleotide letters + (A, C, G, T) are stacked vertically with heights proportional to information content + in bits. Standard DNA colors are used: A=green, C=blue, G=orange, T=red. Semi-transparent + background rectangles behind each letter stack add visual depth. Positions 2-5 + show the conserved TATA core with large dominant letters (T, A, T, A). Positions + 1, 9, and 10 have no visible letters (uniform distribution = 0 bits). Positions + 7 (G) and 8 (C) show moderate conservation. The y-axis ranges from 0 to 1.4 bits + with subtle horizontal gridlines. A "Nucleotide" legend with colored squares appears + on the right. The title reads "sequence-logo-basic · letsplot · pyplots.ai". Overall + layout is clean with good canvas utilization.' + criteria_checklist: + visual_quality: + score: 23 + max: 30 + items: + - id: VQ-01 + name: Text Legibility + score: 6 + max: 8 + passed: true + comment: Font sizes explicitly set. Main letters readable; small-frequency + letters at stack bases are very small due to size scaling. + - id: VQ-02 + name: No Overlap + score: 5 + max: 6 + passed: true + comment: Small letters at bottom of stacks slightly crowded at positions 2-4, + but no significant overlap. + - id: VQ-03 + name: Element Visibility + score: 4 + max: 6 + passed: true + comment: Dominant letters clearly visible. Background rectangles aid visibility. + Small-frequency letters at base are quite tiny. + - id: VQ-04 + name: Color Accessibility + score: 3 + max: 4 + passed: true + comment: Standard DNA colors (green/blue/orange/red) as spec-mandated. Green+red + can challenge colorblind users. + - id: VQ-05 + name: Layout & Canvas + score: 3 + max: 4 + passed: true + comment: Good canvas utilization (~60%). Empty positions 1, 9, 10 create some + whitespace reflecting the data. + - id: VQ-06 + name: Axis Labels & Title + score: 2 + max: 2 + passed: true + comment: Position and Information content (bits) — descriptive with units. + design_excellence: + score: 13 + max: 20 + items: + - id: DE-01 + name: Aesthetic Sophistication + score: 5 + max: 8 + passed: true + comment: Custom DNA palette, semi-transparent background rectangles, bold + letters, minimal theme. Above defaults but not publication-level. + - id: DE-02 + name: Visual Refinement + score: 4 + max: 6 + passed: true + comment: theme_minimal(), x-grid removed, subtle y-grid only, white background, + generous spacing. + - id: DE-03 + name: Data Storytelling + score: 4 + max: 6 + passed: true + comment: TATA-box motif creates natural visual hierarchy. Conserved core dominates + visually. + spec_compliance: + score: 12 + max: 15 + items: + - id: SC-01 + name: Plot Type + score: 3 + max: 5 + passed: false + comment: Sequence logo with stacked letters scaled by info content, but letters + not stretched to fill rectangles as spec requires — lets-plot limitation. + - id: SC-02 + name: Required Features + score: 3 + max: 4 + passed: true + comment: Vertical stacking, info content scaling, standard colors, frequency + ordering, axis labels all present. Missing stretched glyph rendering. + - id: SC-03 + name: Data Mapping + score: 3 + max: 3 + passed: true + comment: X=position, Y=information content. Correct mapping with proper calculation. + - id: SC-04 + name: Title & Legend + score: 3 + max: 3 + passed: true + comment: Title format correct. Legend shows all four nucleotides with correct + color-coded squares. + data_quality: + score: 14 + max: 15 + items: + - id: DQ-01 + name: Feature Coverage + score: 5 + max: 6 + passed: true + comment: Shows high conservation (pos 2-5, 7-8), low conservation (pos 1, + 9-10), and mixed (pos 6). + - id: DQ-02 + name: Realistic Context + score: 5 + max: 5 + passed: true + comment: TATA-box-like transcription factor binding site — a real, well-known + biological motif. + - id: DQ-03 + name: Appropriate Scale + score: 4 + max: 4 + passed: true + comment: Information content 0 to ~1.4 bits, realistic for 4-letter DNA alphabet + (max 2 bits). + code_quality: + score: 10 + max: 10 + items: + - id: CQ-01 + name: KISS Structure + score: 3 + max: 3 + passed: true + comment: 'Linear flow: imports, data, calculation, plot, save. No functions + or classes.' + - id: CQ-02 + name: Reproducibility + score: 2 + max: 2 + passed: true + comment: Deterministic hardcoded frequency data. + - id: CQ-03 + name: Clean Imports + score: 2 + max: 2 + passed: true + comment: All imports (numpy, pandas, lets_plot) are used. + - id: CQ-04 + name: Code Elegance + score: 2 + max: 2 + passed: true + comment: Clean, well-organized code. Invisible legend points technique is + a reasonable workaround. + - id: CQ-05 + name: Output & API + score: 1 + max: 1 + passed: true + comment: Saves as plot.png with scale=3 for 4800x2700. Current API. + library_mastery: + score: 7 + max: 10 + items: + - id: LM-01 + name: Idiomatic Usage + score: 4 + max: 5 + passed: true + comment: 'Good ggplot grammar: aes(), multiple geom layers, scale_*_manual(), + theme customization, guides().' + - id: LM-02 + name: Distinctive Features + score: 3 + max: 5 + passed: true + comment: Uses layer_tooltips() with custom formatting — a distinctive lets-plot + interactive feature. Also exports HTML. + verdict: REJECTED +impl_tags: + dependencies: [] + techniques: + - layer-composition + - custom-legend + - hover-tooltips + - html-export + patterns: + - data-generation + - iteration-over-groups + dataprep: + - normalization + styling: + - grid-styling + - alpha-blending