diff --git a/AGENTS.md b/AGENTS.md index aa18171..0d755b6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -317,7 +317,7 @@ Features: - API keys via `.streamlit/secrets.toml` or environment variables ### Streamlit Pages -- `pages/verify.py` — Email verification endpoint (`/verify?token=...`) +- `pages/verify.py` — Email verification endpoint (`/verify?token=...`); sends welcome email on confirmation - `pages/unsubscribe.py` — One-click unsubscribe endpoint (`/unsubscribe?token=...`) - `pages/impressum.py` — Legal notice (§ 5 DDG) - `pages/privacy.py` — Privacy policy @@ -342,7 +342,8 @@ Supported formats: 4. Jobs already displayed in the UI session are pre-seeded into `job_sent_logs` via `db.upsert_jobs()` + `db.log_sent_jobs()` so the first digest doesn't repeat them 5. `emailer.send_verification_email()` sends a confirmation link via Resend 6. User clicks the link → `pages/verify.py` calls `db.confirm_subscriber()` → sets `is_active=True`, then `db.set_subscriber_expiry()` sets `expires_at = now() + 30 days` -7. If email already active, the form shows "already subscribed" (no re-send) +7. `pages/verify.py` sends a best-effort welcome email via `emailer.send_welcome_email()` (fire-and-forget — failure doesn't affect confirmation) +8. If email already active, the form shows "already subscribed" (no re-send) ### Auto-Expiry - The 30-day clock starts at **DOI confirmation**, not signup (prevents wasted days while email is unconfirmed) @@ -381,9 +382,10 @@ Per-subscriber pipeline, designed to run in GitHub Actions (or any cron schedule Required env vars: `GOOGLE_API_KEY`, `SERPAPI_KEY`, `SUPABASE_URL`, `SUPABASE_SERVICE_KEY`, `RESEND_API_KEY`, `RESEND_FROM`, `APP_URL`. ### Email Templates (`emailer.py`) -- `send_daily_digest()` — HTML table of job matches with score badges and apply links -- `send_verification_email()` — CTA button linking to the verify page -- Both include an impressum footer line built from `IMPRESSUM_NAME`, `IMPRESSUM_ADDRESS`, `IMPRESSUM_EMAIL` env vars +- `send_daily_digest(user_email, jobs, unsubscribe_url, target_location)` — card-style job listings with score pill badges, location pins, "View Job" CTA buttons, match summary stats (excellent/good counts), and target location in header +- `send_welcome_email(email, target_location, subscription_days, privacy_url)` — sent after DOI confirmation; explains what to expect, subscription duration, and links to privacy policy +- `send_verification_email(email, verify_url)` — CTA button linking to the verify page +- All three include an impressum footer line built from `IMPRESSUM_NAME`, `IMPRESSUM_ADDRESS`, `IMPRESSUM_EMAIL` env vars --- @@ -482,7 +484,7 @@ Schema setup: run `python setup_db.py` to check tables and print migration SQL. | `test_cv_parser.py` (6 tests) | `cv_parser.py` | `_clean_text()` + `extract_text()` for .txt/.md, error cases | | `test_models.py` (23 tests) | `models.py` | All Pydantic models: validation, defaults, round-trip serialization | | `test_db.py` (35 tests) | `db.py` | Full GDPR lifecycle: add/confirm/expire/purge subscribers, deactivate by token, data deletion, subscription context, job upsert/dedup, sent-log tracking. All DB functions mocked at Supabase client level | -| `test_emailer.py` (7 tests) | `emailer.py` | HTML generation: job row badges, job count, unsubscribe link, impressum line | +| `test_emailer.py` (22 tests) | `emailer.py` | HTML generation: job row badges/cards/location, job count, match stats, unsubscribe link, target location in header, impressum line, welcome email (location, days, privacy, impressum) | | `test_app_consent.py` (5 tests) | `app.py` | GDPR consent checkbox: session state persistence, widget key separation, on_change sync | ### Testing conventions diff --git a/ROADMAP.md b/ROADMAP.md index 16b3c5b..0990790 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -37,22 +37,21 @@ Based on the current state (private repo, hosted on Streamlit Community Cloud) a ### 1.2 — Deploy Daily Digest - [x] **Set up GitHub Actions cron job** for daily_task.py (e.g., `cron: '0 7 * * *'` UTC) -- [ ] **Add secrets to GitHub Actions** — all required env vars from §10 -- [ ] **Test the full digest cycle** — subscribe, verify, receive digest, unsubscribe +- [x] **Add secrets to GitHub Actions** — all required env vars from §10 +- [x] **Test the full digest cycle** — subscribe, verify, receive digest, unsubscribe +- [x] **Create a welcome email after successful subscription** — explain what to expect, how to contact support, link to privacy policy, show some example matches +- [x] **Add unsubscribe link to digest emails** — include unique tokenized URL to securely identify subscriber without exposing email +- [x] **Make digest email prettier** — use HTML formatting, add Stellenscout logo, style job listings for better readability ### 1.3 — UX Quick Wins - [ ] **Personalize the UI** — greet user by first name extracted from CV profile - [ ] **Add "Edit Profile" step** — let user tweak skills/roles/preferences before searching (this is already in Open Issues) -- [ ] **Add a "Preferences" text input** — free-form like *"I want remote fintech jobs, no big corporations"* → append to Headhunter prompt +- [ ] **Add a "Preferences" text input** — free-form like *"I want remote fintech jobs, no big corporations"* → append to profile prompt - [ ] **Show job age warning** — if `posted_at` is >30 days, badge it as "possibly expired" - [ ] **Improve job cards** — show apply links more prominently, add company logos via Clearbit/Logo.dev - [ ] **Add digest preferences UI** — allow users to change `min_score` and cadence (daily/weekly) after subscription - -### 1.4 — Monitoring & Observability -- [ ] **Add structured logging** — replace `print()` with `logging` module, include run IDs -- [ ] **Track pipeline metrics** — jobs found per query, avg scores, API latency, cache hit rates -- [ ] **Set up error alerting** — GitHub Actions failure notifications (email or Slack webhook) -- [ ] **Add cost dashboard** — track daily SerpAPI + Gemini usage and estimated monthly spend +- [ ] **Remove random jobs from homepage before CV is entered** — show a friendly welcome message instead of empty job cards +- [ ] **Add filter/sort options for publishing date and score** — both in the digest email and on the homepage after search --- diff --git a/daily_task.py b/daily_task.py index d8f14fa..ca54651 100644 --- a/daily_task.py +++ b/daily_task.py @@ -214,6 +214,7 @@ def main() -> int: "company": ej.job.company_name, "url": _job_url(ej), "score": ej.evaluation.score, + "location": ej.job.location, } for ej in good_matches ] @@ -233,7 +234,12 @@ def main() -> int: log.info(" sub=%s — sending %d matches (score >= %d)", sub_id, len(email_jobs), sub_min_score) try: - send_daily_digest(sub_email, email_jobs, unsubscribe_url=unsubscribe_url) + send_daily_digest( + sub_email, + email_jobs, + unsubscribe_url=unsubscribe_url, + target_location=sub.get("target_location", ""), + ) except Exception: log.exception(" sub=%s — failed to send daily digest, continuing", sub_id) diff --git a/stellenscout/emailer.py b/stellenscout/emailer.py index 8e94c8e..70a7747 100644 --- a/stellenscout/emailer.py +++ b/stellenscout/emailer.py @@ -2,59 +2,108 @@ import os from datetime import datetime, timezone +from html import escape as _esc import resend +def _safe_url(url: str) -> str: + """Sanitise a URL for use in an HTML href attribute. + + Only ``http`` and ``https`` schemes are allowed. Anything else + (e.g. ``javascript:``, ``data:``) is replaced with ``#``. + """ + stripped = url.strip() + if stripped and not stripped.lower().startswith(("http://", "https://")): + return "#" + return _esc(stripped, quote=True) + + def _build_job_row(job: dict) -> str: - """Return an HTML table row for a single job.""" + """Return an HTML card block for a single job.""" score = job.get("score") badge_color = "#22c55e" if (score or 0) >= 80 else "#eab308" if (score or 0) >= 70 else "#f97316" score_html = ( - f'{score}/100' + f'{score}/100' if score else "" ) - apply_url = job.get("url", "#") + apply_url = _safe_url(job.get("url", "#")) + location = _esc(job.get("location", "")) + location_html = ( + f'
📍 {location}
' if location else "" + ) + title = _esc(job.get("title", "")) + company = _esc(job.get("company", "")) return f""" - - - {job["title"]}
- {job["company"]} - - - {score_html} - - - - View → - - - """ + +
+ + + +
+
{title}
+
{company}
+ {location_html} +
+ {score_html} +
+
+ + View Job → + +
+
+ """ def _impressum_line() -> str: - """Return a one-line impressum string for email footers (§ 5 DDG).""" - name = os.environ.get("IMPRESSUM_NAME", "") - address = os.environ.get("IMPRESSUM_ADDRESS", "").replace("\n", ", ") - email = os.environ.get("IMPRESSUM_EMAIL", "") + """Return a one-line HTML-safe impressum string for email footers (§ 5 DDG).""" + name = _esc(os.environ.get("IMPRESSUM_NAME", "")) + address = _esc(os.environ.get("IMPRESSUM_ADDRESS", "").replace("\n", ", ")) + email = _esc(os.environ.get("IMPRESSUM_EMAIL", "")) parts = [p for p in (name, address, email) if p] return " · ".join(parts) if parts else "StellenScout" -def _build_html(jobs: list[dict], unsubscribe_url: str = "") -> str: +def _build_html(jobs: list[dict], unsubscribe_url: str = "", target_location: str = "") -> str: """Build a full HTML email body for the daily digest.""" today = datetime.now(timezone.utc).strftime("%B %d, %Y") rows = "\n".join(_build_job_row(j) for j in jobs) impressum = _impressum_line() + safe_location = _esc(target_location) + location_subtitle = ( + f'

Jobs in {safe_location}

' if safe_location else "" + ) + + excellent = sum(1 for j in jobs if (j.get("score") or 0) >= 80) + good = sum(1 for j in jobs if 70 <= (j.get("score") or 0) < 80) + stats_parts: list[str] = [] + if excellent: + stats_parts.append( + f'' + f"{excellent} excellent" + ) + if good: + stats_parts.append( + f'' + f"{good} good" + ) + stats_html = f'

{" ".join(stats_parts)}

' if stats_parts else "" + return f"""\ - + @@ -65,28 +114,20 @@ def _build_html(jobs: list[dict], unsubscribe_url: str = "") -> str:
-

StellenScout Daily Digest

+

🔎 StellenScout Daily Digest

{today}

+ {location_subtitle}
-

+

We found {len(jobs)} new job match{"es" if len(jobs) != 1 else ""} for you today:

+ {stats_html} - - - - - - - - +
PositionScoreLink
{rows} @@ -97,8 +138,9 @@ def _build_html(jobs: list[dict], unsubscribe_url: str = "") -> str:
+

You're receiving this because you subscribed to StellenScout.

{impressum} - {f'
Unsubscribe' if unsubscribe_url else ""} + {f'
Unsubscribe' if unsubscribe_url else ""}
@@ -106,14 +148,20 @@ def _build_html(jobs: list[dict], unsubscribe_url: str = "") -> str: """ -def send_daily_digest(user_email: str, jobs: list[dict], unsubscribe_url: str = "") -> dict: +def send_daily_digest( + user_email: str, + jobs: list[dict], + unsubscribe_url: str = "", + target_location: str = "", +) -> dict: """Send a daily digest email with new job matches. Args: user_email: Recipient email address. jobs: List of job dicts, each with at least ``title``, ``company``, - ``url``, and optionally ``score``. + ``url``, and optionally ``score`` and ``location``. unsubscribe_url: One-click unsubscribe link for this subscriber. + target_location: Subscriber's target job location (shown in header). Returns: Resend API response dict. @@ -133,7 +181,110 @@ def send_daily_digest(user_email: str, jobs: list[dict], unsubscribe_url: str = "from": from_addr, "to": [user_email], "subject": f"StellenScout: {len(jobs)} new job match{'es' if len(jobs) != 1 else ''} for you", - "html": _build_html(jobs, unsubscribe_url=unsubscribe_url), + "html": _build_html(jobs, unsubscribe_url=unsubscribe_url, target_location=target_location), + } + if unsubscribe_url: + params["headers"] = { + "List-Unsubscribe": f"<{unsubscribe_url}>", + "List-Unsubscribe-Post": "List-Unsubscribe=One-Click", + } + + return resend.Emails.send(params) + + +def send_welcome_email( + email: str, + target_location: str = "", + subscription_days: int = 30, + privacy_url: str = "", + unsubscribe_url: str = "", +) -> dict: + """Send a welcome email after successful DOI confirmation. + + Args: + email: Recipient email address. + target_location: Subscriber's target job location. + subscription_days: Duration of the subscription in days. + privacy_url: URL to the privacy policy page. + unsubscribe_url: One-click unsubscribe link for this subscriber. + + Returns: + Resend API response dict. + + Raises: + ValueError: If RESEND_API_KEY is not set. + """ + api_key = os.environ.get("RESEND_API_KEY") + if not api_key: + raise ValueError("RESEND_API_KEY environment variable not set") + + resend.api_key = api_key + from_addr = os.environ.get("RESEND_FROM", "StellenScout ") + impressum = _impressum_line() + + safe_location = _esc(target_location) + location_line = ( + f'" + if safe_location + else "" + ) + safe_privacy_url = _safe_url(privacy_url) if privacy_url else "" + privacy_line = f'Privacy Policy' if safe_privacy_url else "" + unsub_html = ( + f'Unsubscribe' if unsubscribe_url else "" + ) + footer_links = " · ".join(link for link in (privacy_line, unsub_html) if link) + footer_links_html = f"
{footer_links}" if footer_links else "" + + html = f"""\ + + + + +
+
+
🎉
+

Welcome to StellenScout

+

Your subscription is confirmed

+
+
+

+ Your daily job digest is now active. Here's what to expect:

+
' + f'📍 ' + f"Daily AI-matched jobs in {safe_location}
+ {location_line} + + + +
+ 📅 + Subscription runs for {subscription_days} days, then expires automatically
+ ✉️ + First digest arrives tomorrow morning
+ 🔓 + Unsubscribe any time via the link in each email
+
+
+ {impressum} + {footer_links_html} +
+ + +""" + + params: dict = { + "from": from_addr, + "to": [email], + "subject": "Welcome to StellenScout \u2014 your daily digest starts tomorrow", + "html": html, } if unsubscribe_url: params["headers"] = { @@ -165,30 +316,44 @@ def send_verification_email(email: str, verify_url: str) -> dict: # type: ignor html = f"""\ - +
-

StellenScout

-

Confirm your email address

+ padding:32px 24px;color:#fff;text-align:center"> +
🔎
+

StellenScout

+

One click to activate your daily job digest

-

Hello,

-

Thank you for subscribing to the StellenScout Daily Digest. - Please confirm your email address by clicking the button below:

+

+ Thank you for subscribing! Please confirm your email address to start + receiving AI-matched job listings:

- - Confirm subscription + font-size:16px;display:inline-block"> + Confirm subscription ✓

-

+ + + + +
+ 🚀 + AI scores every job against your CV
+ ✉️ + Daily digest with your best matches
+ 🔓 + Unsubscribe any time, data deleted automatically
+

This link is valid for 24 hours. If you did not sign up, you can safely ignore this email.

diff --git a/stellenscout/pages/verify.py b/stellenscout/pages/verify.py index 4a70cef..ded477f 100644 --- a/stellenscout/pages/verify.py +++ b/stellenscout/pages/verify.py @@ -8,7 +8,17 @@ logger = logging.getLogger(__name__) # Inject secrets into env vars (same pattern as app.py) -for key in ("SUPABASE_URL", "SUPABASE_KEY", "SUPABASE_SERVICE_KEY"): +for key in ( + "SUPABASE_URL", + "SUPABASE_KEY", + "SUPABASE_SERVICE_KEY", + "RESEND_API_KEY", + "RESEND_FROM", + "APP_URL", + "IMPRESSUM_NAME", + "IMPRESSUM_ADDRESS", + "IMPRESSUM_EMAIL", +): if key not in os.environ: try: os.environ[key] = st.secrets[key] @@ -78,5 +88,32 @@ def _request_metadata() -> tuple[str | None, str | None]: f"for {SUBSCRIPTION_DAYS} days. You can unsubscribe at any time via the link in each email." ) st.balloons() + + # Best-effort welcome email — failure doesn't affect confirmation + try: + import secrets as _secrets + from datetime import timedelta as _td + + from stellenscout.db import issue_unsubscribe_token + from stellenscout.emailer import send_welcome_email + + _app_url = os.environ.get("APP_URL", "").rstrip("/") + + _unsub_url = "" + if _app_url: + _unsub_token = _secrets.token_urlsafe(32) + _unsub_expires = (_dt.now(_tz.utc) + _td(days=SUBSCRIPTION_DAYS)).isoformat() + if issue_unsubscribe_token(db, subscriber["id"], token=_unsub_token, expires_at=_unsub_expires): + _unsub_url = f"{_app_url}/unsubscribe?token={_unsub_token}" + + send_welcome_email( + email=subscriber["email"], + target_location=subscriber.get("target_location", ""), + subscription_days=SUBSCRIPTION_DAYS, + privacy_url=f"{_app_url}/privacy" if _app_url else "", + unsubscribe_url=_unsub_url, + ) + except Exception: + logger.exception("Failed to send welcome email") else: st.error("This confirmation link is invalid or has expired. Please subscribe again.") diff --git a/tests/test_emailer.py b/tests/test_emailer.py index e7ec2fc..9011bba 100644 --- a/tests/test_emailer.py +++ b/tests/test_emailer.py @@ -1,8 +1,53 @@ """Tests for stellenscout.emailer — HTML builder pure functions.""" +from unittest.mock import patch + import pytest -from stellenscout.emailer import _build_html, _build_job_row, _impressum_line +from stellenscout.emailer import ( + _build_html, + _build_job_row, + _impressum_line, + _safe_url, + send_verification_email, + send_welcome_email, +) + + +class TestSafeUrl: + def test_allows_https(self): + assert _safe_url("https://example.com") == "https://example.com" + + def test_allows_http(self): + assert _safe_url("http://example.com") == "http://example.com" + + def test_blocks_javascript(self): + assert _safe_url("javascript:alert(1)") == "#" + + def test_blocks_data_uri(self): + assert _safe_url("data:text/html,

hi

") == "#" + + def test_escapes_quotes_in_url(self): + assert "&" in _safe_url("https://example.com?a=1&b=2") + + def test_empty_string(self): + assert _safe_url("") == "" + + +class TestHtmlEscapingInJobRow: + def test_escapes_title_with_html(self): + html = _build_job_row({"title": "", "company": "Co", "url": "https://x.com"}) + assert "