diff --git a/plots/line-retention-cohort/implementations/letsplot.py b/plots/line-retention-cohort/implementations/letsplot.py new file mode 100644 index 0000000000..4261cb15d0 --- /dev/null +++ b/plots/line-retention-cohort/implementations/letsplot.py @@ -0,0 +1,108 @@ +""" pyplots.ai +line-retention-cohort: User Retention Curve by Cohort +Library: letsplot 4.9.0 | Python 3.14.3 +Quality: 91/100 | Created: 2026-03-16 +""" + +import numpy as np +import pandas as pd +from lets_plot import * + + +LetsPlot.setup_html() + +# Data: Monthly signup cohorts tracked weekly for 12 weeks +np.random.seed(42) +weeks = np.arange(0, 13) + +cohorts = { + "Jan 2025": {"size": 1245, "decay": 0.18}, + "Feb 2025": {"size": 1102, "decay": 0.16}, + "Mar 2025": {"size": 1380, "decay": 0.14}, + "Apr 2025": {"size": 1510, "decay": 0.12}, + "May 2025": {"size": 1425, "decay": 0.10}, +} + +rows = [] +for cohort_name, params in cohorts.items(): + retention = 100 * np.exp(-params["decay"] * weeks) + noise = np.random.normal(0, 1.5, len(weeks)) + noise[0] = 0 + retention = np.clip(retention + noise, 0, 100) + retention[0] = 100.0 + label = f"{cohort_name} (n={params['size']:,})" + for w, r in zip(weeks, retention): + rows.append({"Week": w, "Retention": r, "Cohort": label}) + +df = pd.DataFrame(rows) + +# Endpoint labels: last data point per cohort, with nudge to avoid overlap +endpoints = df[df["Week"] == 12].copy() +endpoints["label"] = endpoints["Retention"].apply(lambda x: f"{x:.0f}%") +# Adjust y positions to prevent label overlap (spread close values apart) +sorted_ep = endpoints.sort_values("Retention").reset_index(drop=True) +min_gap = 3.5 +for i in range(1, len(sorted_ep)): + if sorted_ep.loc[i, "Retention"] - sorted_ep.loc[i - 1, "Retention"] < min_gap: + sorted_ep.loc[i, "Retention"] = sorted_ep.loc[i - 1, "Retention"] + min_gap +endpoints = sorted_ep + +# Colorblind-friendly palette with distinct hues (oldest=lightest, newest=boldest) +colors = ["#A6CEE3", "#B2DF8A", "#FDBF6F", "#E31A1C", "#306998"] + +# Line widths: older cohorts thinner, newer cohorts bolder +line_widths = [1.5, 1.8, 2.0, 2.5, 3.0] + +# Build plot with per-cohort layers for varying line widths +cohort_labels = df["Cohort"].unique().tolist() + +plot = ggplot() + +# Add lines and points per cohort with distinct widths +for i, cohort_label in enumerate(cohort_labels): + cdf = df[df["Cohort"] == cohort_label] + plot = plot + geom_line( + aes(x="Week", y="Retention", color="Cohort"), + data=cdf, + size=line_widths[i], + alpha=0.9, + tooltips=layer_tooltips().line("@Cohort").line("Week @Week").line("Retention @Retention{.1f}%"), + ) + +plot = ( + plot + + geom_point(aes(x="Week", y="Retention", color="Cohort"), data=df, size=4, alpha=0.85) + + geom_hline(yintercept=20, linetype="dashed", color="#999999", size=0.8) + + geom_text( + aes(x="Week", y="Retention", label="label", color="Cohort"), data=endpoints, size=14, nudge_x=0.6, hjust=0 + ) + + geom_text( + aes(x="x", y="y", label="label"), + data=pd.DataFrame({"x": [0.2], "y": [20], "label": ["20% threshold"]}), + size=12, + color="#999999", + hjust=0, + vjust=-1.2, + ) + + scale_color_manual(values=colors) + + scale_x_continuous(breaks=list(range(0, 13, 2)), limits=[0, 14.5]) + + scale_y_continuous(breaks=list(range(0, 101, 20)), limits=[0, 105]) + + labs(title="line-retention-cohort · letsplot · pyplots.ai", x="Weeks Since Signup", y="Retained Users (%)") + + theme_minimal() + + theme( + plot_title=element_text(size=28, hjust=0.5, face="bold"), + axis_title=element_text(size=22), + axis_text=element_text(size=18), + legend_title=element_blank(), + legend_text=element_text(size=16), + legend_position="right", + panel_grid_major=element_line(color="#EBEBEB", size=0.4), + panel_grid_minor=element_blank(), + plot_background=element_rect(color="white", fill="white"), + ) + + ggsize(1600, 900) +) + +# Save +ggsave(plot, "plot.png", path=".", scale=3) +ggsave(plot, "plot.html", path=".") diff --git a/plots/line-retention-cohort/metadata/letsplot.yaml b/plots/line-retention-cohort/metadata/letsplot.yaml new file mode 100644 index 0000000000..06d98fa02b --- /dev/null +++ b/plots/line-retention-cohort/metadata/letsplot.yaml @@ -0,0 +1,225 @@ +library: letsplot +specification_id: line-retention-cohort +created: '2026-03-16T20:44:20Z' +updated: '2026-03-16T20:57:20Z' +generated_by: claude-opus-4-5-20251101 +workflow_run: 23164943466 +issue: 4572 +python_version: 3.14.3 +library_version: 4.9.0 +preview_url: https://storage.googleapis.com/pyplots-images/plots/line-retention-cohort/letsplot/plot.png +preview_thumb: https://storage.googleapis.com/pyplots-images/plots/line-retention-cohort/letsplot/plot_thumb.png +preview_html: https://storage.googleapis.com/pyplots-images/plots/line-retention-cohort/letsplot/plot.html +quality_score: 91 +review: + strengths: + - Excellent data storytelling through progressive line weights and color intensity + emphasizing newer cohorts + - Endpoint labels with overlap prevention provide clear final-retention context + - 20% threshold reference line adds analytical value + - Full spec compliance with all required features implemented + - Interactive tooltips leverage lets-plot distinctive capabilities + weaknesses: + - Endpoint labels for lower cohorts (12%, 15%, 17%) are slightly tight despite overlap + prevention + image_description: 'The plot displays 5 retention curves for monthly signup cohorts + (Jan–May 2025) on a clean white background. All curves start at 100% at week 0 + and decay over 12 weeks with exponential profiles. Colors progress from light + blue (Jan 2025, oldest) through green (Feb), orange (Mar), red (Apr), to dark + navy blue (May 2025, newest). Newer cohorts have thicker lines, creating clear + visual hierarchy. Data points are marked along each curve. Endpoint percentage + labels (12%, 15%, 17%, 23%, 31%) are displayed at week 12, color-matched to their + respective cohorts. A dashed gray horizontal line at y=20 marks the "20% threshold" + benchmark. The legend on the right lists each cohort with sample size (e.g., "Jan + 2025 (n=1,245)"). Title reads "line-retention-cohort · letsplot · pyplots.ai". + X-axis: "Weeks Since Signup", Y-axis: "Retained Users (%)". Subtle light gray + major gridlines on a minimal theme.' + criteria_checklist: + visual_quality: + score: 29 + max: 30 + items: + - id: VQ-01 + name: Text Legibility + score: 8 + max: 8 + passed: true + comment: 'All font sizes explicitly set: title=28, axis_title=22, axis_text=18, + legend_text=16' + - id: VQ-02 + name: No Overlap + score: 5 + max: 6 + passed: true + comment: Endpoint labels use overlap prevention but 12%/15%/17% are still + fairly tight + - id: VQ-03 + name: Element Visibility + score: 6 + max: 6 + passed: true + comment: Lines well-sized with progressive widths 1.5-3.0, points at size=4 + clearly visible + - id: VQ-04 + name: Color Accessibility + score: 4 + max: 4 + passed: true + comment: 'Distinct colorblind-friendly palette: light blue, green, orange, + red, dark blue' + - id: VQ-05 + name: Layout & Canvas + score: 4 + max: 4 + passed: true + comment: Plot fills canvas well, x-axis extended to accommodate endpoint labels + - id: VQ-06 + name: Axis Labels & Title + score: 2 + max: 2 + passed: true + comment: 'Descriptive labels with units: Weeks Since Signup, Retained Users + (%)' + design_excellence: + score: 15 + max: 20 + items: + - id: DE-01 + name: Aesthetic Sophistication + score: 6 + max: 8 + passed: true + comment: Custom palette with light-to-bold progression, endpoint labels and + threshold annotation add polish + - id: DE-02 + name: Visual Refinement + score: 4 + max: 6 + passed: true + comment: theme_minimal(), subtle grid, no minor grid, white background + - id: DE-03 + name: Data Storytelling + score: 5 + max: 6 + passed: true + comment: Strong visual hierarchy through line width/color, endpoint labels, + threshold reference line + spec_compliance: + score: 15 + max: 15 + items: + - id: SC-01 + name: Plot Type + score: 5 + max: 5 + passed: true + comment: Correct line chart with multiple cohort retention curves + - id: SC-02 + name: Required Features + score: 4 + max: 4 + passed: true + comment: All spec features present including threshold line, varying line + thickness, legend with sizes + - id: SC-03 + name: Data Mapping + score: 3 + max: 3 + passed: true + comment: X=weeks since signup, Y=retention percentage, correctly mapped + - id: SC-04 + name: Title & Legend + score: 3 + max: 3 + passed: true + comment: Title format correct, legend labels match spec format with cohort + size + data_quality: + score: 15 + max: 15 + items: + - id: DQ-01 + name: Feature Coverage + score: 6 + max: 6 + passed: true + comment: 5 cohorts with different decay rates showing clear variation + - id: DQ-02 + name: Realistic Context + score: 5 + max: 5 + passed: true + comment: Monthly signup cohorts with realistic sizes and plausible retention + decay rates + - id: DQ-03 + name: Appropriate Scale + score: 4 + max: 4 + passed: true + comment: Retention values 12-31% at week 12, cohort sizes 1102-1510 realistic + code_quality: + score: 10 + max: 10 + items: + - id: CQ-01 + name: KISS Structure + score: 3 + max: 3 + passed: true + comment: Clean imports-data-plot-save flow + - id: CQ-02 + name: Reproducibility + score: 2 + max: 2 + passed: true + comment: np.random.seed(42) set + - id: CQ-03 + name: Clean Imports + score: 2 + max: 2 + passed: true + comment: 'All imports used: numpy, pandas, lets_plot' + - id: CQ-04 + name: Code Elegance + score: 2 + max: 2 + passed: true + comment: Clean code with thoughtful endpoint label overlap prevention + - id: CQ-05 + name: Output & API + score: 1 + max: 1 + passed: true + comment: Saves as plot.png with scale=3 and plot.html + library_mastery: + score: 7 + max: 10 + items: + - id: LM-01 + name: Idiomatic Usage + score: 4 + max: 5 + passed: true + comment: Good ggplot grammar usage, per-group loop justified for varying line + widths + - id: LM-02 + name: Distinctive Features + score: 3 + max: 5 + passed: true + comment: Uses layer_tooltips() for interactive hover, HTML export, ggsize() + verdict: APPROVED +impl_tags: + dependencies: [] + techniques: + - annotations + - layer-composition + - hover-tooltips + - html-export + patterns: + - data-generation + - iteration-over-groups + dataprep: [] + styling: + - alpha-blending + - grid-styling