In [1]:
from __future__ import annotations

import os
from pathlib import Path
from datetime import datetime

ROOT = Path.cwd().resolve()
if ROOT.name == "notebooks_v2":
    os.chdir(ROOT.parent)
ROOT = Path.cwd().resolve()
assert (ROOT / "src").exists(), f"Not at repo root. CWD={ROOT}"
print("✅ CWD =", ROOT)

def backup_write(rel_path: str, content: str) -> None:
    p = ROOT / rel_path
    p.parent.mkdir(parents=True, exist_ok=True)
    if p.exists():
        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        bak = p.with_suffix(p.suffix + f".bak_{ts}")
        bak.write_text(p.read_text(encoding="utf-8"), encoding="utf-8")
        print("BACKUP:", bak)
    p.write_text(content, encoding="utf-8")
    print("WROTE:", p)

backup_write("src/careeragent/langgraph/runtime_nodes.py", r'''
from __future__ import annotations

import asyncio
import json
import os
import urllib.parse
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional

import httpx


# ---------- helpers ----------
def _utc_now() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def _apply_delta(state: Dict[str, Any], delta: Any) -> Dict[str, Any]:
    if not isinstance(delta, dict):
        return state

    # list reducers
    for k in ("live_feed", "attempts", "gates", "evaluations", "steps"):
        if k in delta and isinstance(delta[k], list):
            state.setdefault(k, [])
            state[k].extend(delta[k])

    # dict reducers
    if "artifacts" in delta and isinstance(delta["artifacts"], dict):
        state.setdefault("artifacts", {})
        state["artifacts"].update(delta["artifacts"])

    # overwrite remaining keys
    for k, v in delta.items():
        if k in ("live_feed", "attempts", "gates", "evaluations", "steps", "artifacts"):
            continue
        state[k] = v

    return state


def _feed(state: Dict[str, Any], layer: str, agent: str, message: str) -> None:
    state.setdefault("live_feed", [])
    state["live_feed"].append({"layer": layer, "agent": agent, "message": message})


def _log_attempt(
    state: Dict[str, Any],
    *,
    layer: str,
    agent: str,
    tool: str,
    model: Optional[str],
    status: str,
    confidence: float,
    error: Optional[str] = None,
) -> None:
    state.setdefault("attempts", [])
    state["attempts"].append({
        "layer_id": layer,
        "agent": agent,
        "tool": tool,
        "model": model,
        "status": status,
        "confidence": float(confidence),
        "error": error,
        "at_utc": _utc_now(),
    })


def _pick_fn(mod: Any, *names: str) -> Optional[Callable[..., Any]]:
    for n in names:
        fn = getattr(mod, n, None)
        if callable(fn):
            return fn
    return None


def _artifacts_root() -> Path:
    # local-first standard
    return Path("src/careeragent/artifacts").resolve()


def _runs_dir(run_id: str) -> Path:
    d = _artifacts_root() / "runs" / run_id
    d.mkdir(parents=True, exist_ok=True)
    return d


def _daily_dir() -> Path:
    day = datetime.now().strftime("%Y-%m-%d")
    d = _artifacts_root() / "daily_jobs" / day
    d.mkdir(parents=True, exist_ok=True)
    return d


def _save_json(path: Path, obj: Any) -> str:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(obj, indent=2), encoding="utf-8")
    return str(path)


def _fast_l0_guard(state: Dict[str, Any]) -> Dict[str, Any]:
    resume = (state.get("resume_text") or "").lower()
    signals = ["ignore previous instructions", "system prompt", "developer message", "jailbreak", "exfiltrate"]
    blocked = any(s in resume for s in signals)

    attempt = {
        "layer_id": "L0",
        "agent": "SanitizeAgent",
        "tool": "local.prompt_injection_heuristic",
        "model": None,
        "status": "failed" if blocked else "ok",
        "confidence": 0.10 if blocked else 0.95,
        "error": "prompt_injection" if blocked else None,
        "at_utc": _utc_now(),
    }

    msg = "Security passed." if not blocked else "Blocked: prompt injection detected."
    delta: Dict[str, Any] = {"live_feed": [{"layer": "L0", "agent": "SanitizeAgent", "message": msg}], "attempts": [attempt]}
    if blocked:
        delta["status"] = "blocked"
        delta["pending_action"] = "security_blocked"
    return delta


# ---------- L3 SAFE DISCOVERY ----------
async def _serper_search(api_key: str, query: str, *, num: int = 10, tbs: Optional[str] = None, timeout_s: float = 12.0) -> Dict[str, Any]:
    headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
    payload: Dict[str, Any] = {"q": query, "num": num}
    if tbs:
        payload["tbs"] = tbs

    async with httpx.AsyncClient(timeout=timeout_s) as client:
        r = await client.post("https://google.serper.dev/search", headers=headers, json=payload)

    if r.status_code == 403:
        return {"ok": False, "error": "serper_403_quota", "items": []}
    if r.status_code >= 400:
        return {"ok": False, "error": f"serper_{r.status_code}", "items": []}

    organic = (r.json().get("organic") or [])
    items = [{"title": x.get("title") or "", "url": x.get("link") or "", "snippet": x.get("snippet") or ""} for x in organic]
    return {"ok": True, "error": None, "items": items}


async def _mcp_search(server_url: str, token: str, queries: List[str], recency_hours: float, *, timeout_s: float = 15.0) -> Dict[str, Any]:
    url = server_url.rstrip("/") + "/invoke"
    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
    payload = {"tool": "jobs.search", "payload": {"queries": queries, "recency_hours": recency_hours}}
    async with httpx.AsyncClient(timeout=timeout_s) as client:
        r = await client.post(url, headers=headers, json=payload)

    if r.status_code >= 400:
        return {"ok": False, "error": f"mcp_{r.status_code}", "items": []}

    data = r.json()
    hits = data.get("results") or data.get("jobs") or []
    items = []
    for it in hits:
        u = it.get("url") or it.get("link") or ""
        items.append({"title": it.get("title") or "", "url": u, "snippet": it.get("snippet") or "", "source": "mcp"})
    return {"ok": True, "error": None, "items": items}


def _fallback_board_urls(role: str, location: str) -> List[Dict[str, Any]]:
    q = urllib.parse.quote_plus(f"{role} {location}")
    loc = urllib.parse.quote_plus(location)

    return [
        {"title": f"LinkedIn search: {role}", "url": f"https://www.linkedin.com/jobs/search/?keywords={q}&location={loc}", "snippet": "Fallback job-board search"},
        {"title": f"Indeed search: {role}", "url": f"https://www.indeed.com/jobs?q={q}&l={loc}", "snippet": "Fallback job-board search"},
        {"title": f"Dice search: {role}", "url": f"https://www.dice.com/jobs?q={q}&location={loc}", "snippet": "Fallback job-board search"},
        {"title": f"ZipRecruiter search: {role}", "url": f"https://www.ziprecruiter.com/jobs-search?search={q}&location={loc}", "snippet": "Fallback job-board search"},
        {"title": f"Glassdoor search: {role}", "url": f"https://www.google.com/search?q=site:glassdoor.com+{q}", "snippet": "Fallback via Google"},
        {"title": f"Monster search: {role}", "url": f"https://www.google.com/search?q=site:monster.com+{q}", "snippet": "Fallback via Google"},
        {"title": f"Lever search: {role}", "url": f"https://www.google.com/search?q=site:jobs.lever.co+{q}", "snippet": "Fallback via Google"},
        {"title": f"Greenhouse search: {role}", "url": f"https://www.google.com/search?q=site:boards.greenhouse.io+{q}", "snippet": "Fallback via Google"},
    ]


async def _l3_discovery_safe(state: Dict[str, Any]) -> Dict[str, Any]:
    prefs = state.get("preferences") or {}
    roles = prefs.get("target_roles") or [prefs.get("target_role") or "Data Scientist"]
    roles = [str(r).strip() for r in roles if str(r).strip()][:4]
    location = str(prefs.get("location") or "United States")
    recency_h = float(prefs.get("recency_hours") or 36.0)
    tbs = "qdr:d" if recency_h <= 36 else None
    max_jobs = int(prefs.get("max_jobs") or 40)

    api_key = os.getenv("SERPER_API_KEY", "")
    mcp_url = os.getenv("MCP_SERVER_URL", "")
    mcp_tok = os.getenv("MCP_API_KEY") or os.getenv("MCP_AUTH_TOKEN") or ""

    # Build 8 board-focused Serper queries (keeps quota sane)
    board_domains = [
        "linkedin.com/jobs",
        "indeed.com",
        "glassdoor.com",
        "ziprecruiter.com",
        "monster.com",
        "dice.com",
        "jobs.lever.co",
        "boards.greenhouse.io",
    ]
    # Use only the strongest role for board queries; keep others as generic
    primary_role = roles[0] if roles else "Data Scientist"
    queries = [f'site:{d} "{primary_role}" "{location}"' for d in board_domains]
    queries.extend([f'{r} {location} apply' for r in roles[:2]])
    state["discovery_queries"] = queries

    items: List[Dict[str, Any]] = []

    # Tool A: Serper (concurrent, each capped)
    if api_key:
        async def one(q: str):
            try:
                return await asyncio.wait_for(_serper_search(api_key, q, num=10, tbs=tbs, timeout_s=12.0), timeout=14.0)
            except Exception as e:
                return {"ok": False, "error": str(e), "items": []}

        results = await asyncio.gather(*[one(q) for q in queries[:10]], return_exceptions=False)
        for r in results:
            ok = bool(r.get("ok"))
            err = r.get("error")
            conf = 0.75 if ok and r.get("items") else 0.30
            _log_attempt(state, layer="L3", agent="DiscoveryAgent", tool="serper.search", model=None,
                         status="ok" if ok else "failed", confidence=conf, error=err)
            items.extend([{"title": x["title"], "url": x["url"], "snippet": x["snippet"], "source": "serper"} for x in (r.get("items") or [])])

    # Tool B: MCP fallback if Serper empty or quota
    if (not items) and mcp_url and mcp_tok:
        try:
            r = await asyncio.wait_for(_mcp_search(mcp_url, mcp_tok, queries, recency_h, timeout_s=15.0), timeout=18.0)
            ok = bool(r.get("ok"))
            err = r.get("error")
            conf = 0.85 if ok and r.get("items") else 0.30
            _log_attempt(state, layer="L3", agent="DiscoveryAgent", tool="mcp.jobs.search", model=None,
                         status="ok" if ok else "failed", confidence=conf, error=err)
            items.extend(r.get("items") or [])
        except Exception as e:
            _log_attempt(state, layer="L3", agent="DiscoveryAgent", tool="mcp.jobs.search", model=None,
                         status="failed", confidence=0.0, error=str(e))

    # Tool C: local fallback URLs (always returns quickly)
    if not items:
        _log_attempt(state, layer="L3", agent="DiscoveryAgent", tool="local.fallback_board_urls", model=None,
                     status="ok", confidence=0.55, error=None)
        items = _fallback_board_urls(primary_role, location)

    # Normalize + dedupe
    seen = set()
    jobs_raw: List[Dict[str, Any]] = []
    for it in items:
        url = (it.get("url") or "").strip()
        if not url:
            continue
        if url in seen:
            continue
        seen.add(url)
        jobs_raw.append({
            "title": it.get("title") or "",
            "url": url,
            "snippet": it.get("snippet") or "",
            "source": it.get("source") or "unknown",
        })
        if len(jobs_raw) >= max_jobs:
            break

    # Persist artifacts
    run_id = str(state.get("run_id") or "run")
    run_dir = _runs_dir(run_id)
    jobs_path = run_dir / "jobs_raw.json"
    daily_path = _daily_dir() / f"{run_id}_jobs_raw.json"

    artifacts = {
        "jobs_raw": {"path": _save_json(jobs_path, jobs_raw), "content_type": "application/json"},
        "jobs_raw_daily": {"path": _save_json(daily_path, {"run_id": run_id, "at_utc": _utc_now(), "jobs": jobs_raw}), "content_type": "application/json"},
    }

    return {
        "jobs_raw": jobs_raw,
        "artifacts": artifacts,
        "live_feed": [{"layer": "L3", "agent": "DiscoveryAgent", "message": f"Discovery completed: {len(jobs_raw)} jobs (saved to artifacts)."}],
    }


# ---------- public API (used by RunManagerService) ----------
async def run_single_layer(state: Dict[str, Any], layer: str) -> Dict[str, Any]:
    layer = (layer or "").upper().strip()

    # L0 always instant
    if layer == "L0":
        return _apply_delta(state, _fast_l0_guard(state))

    # L3 is now indestructible
    if layer == "L3":
        return _apply_delta(state, await _l3_discovery_safe(state))

    # L2/L4/L5 use your existing nodes.py (name-flexible)
    if layer in ("L2", "L4", "L5"):
        from careeragent.langgraph import nodes as nodes_mod

        l2 = _pick_fn(nodes_mod, "l2_parser_node", "l2_parse_node", "l2_intake_node", "parser_node")
        l4 = _pick_fn(nodes_mod, "l4_match_node", "matcher_node", "match_node", "score_node")
        l5 = _pick_fn(nodes_mod, "l5_rank_node", "rank_node", "evaluator_rank_node")

        fn_map = {"L2": l2, "L4": l4, "L5": l5}
        fn = fn_map.get(layer)
        if not fn:
            _feed(state, "L1", "Runtime", f"{layer} node not found in careeragent.langgraph.nodes.py")
            state["status"] = "needs_human_approval"
            state["pending_action"] = f"missing_{layer.lower()}_node"
            return state

        delta = await fn(state)  # type: ignore[misc]
        state = _apply_delta(state, delta)

        if layer == "L5" and state.get("ranking") and not state.get("pending_action"):
            state["status"] = "needs_human_approval"
            state["pending_action"] = "review_ranking"
        return state

    # L6–L9 (if present)
    if layer in ("L6", "L7", "L8", "L9"):
        from careeragent.langgraph.nodes_l6_l9 import (
            l6_draft_node, l6_evaluator_node,
            l7_apply_node, l7_evaluator_node,
            l8_tracker_node, l8_evaluator_node,
            l9_analytics_node,
        )

        if layer == "L6":
            state = _apply_delta(state, await l6_draft_node(state))      # type: ignore[arg-type]
            state = _apply_delta(state, await l6_evaluator_node(state))  # type: ignore[arg-type]
            return state
        if layer == "L7":
            state = _apply_delta(state, await l7_apply_node(state))      # type: ignore[arg-type]
            state = _apply_delta(state, await l7_evaluator_node(state))  # type: ignore[arg-type]
            return state
        if layer == "L8":
            state = _apply_delta(state, await l8_tracker_node(state))      # type: ignore[arg-type]
            state = _apply_delta(state, await l8_evaluator_node(state))    # type: ignore[arg-type]
            return state
        if layer == "L9":
            state = _apply_delta(state, await l9_analytics_node(state))    # type: ignore[arg-type]
            return state

    _feed(state, "L1", "Runtime", f"Layer {layer} not implemented.")
    return state


async def approve_ranking_flow(state: Dict[str, Any]) -> Dict[str, Any]:
    from careeragent.langgraph.nodes_l6_l9 import l6_draft_node, l6_evaluator_node
    state["status"] = "running"
    state["pending_action"] = None
    _feed(state, "L6", "HITL", "Ranking approved. Generating drafts…")
    state = _apply_delta(state, await l6_draft_node(state))      # type: ignore[arg-type]
    state = _apply_delta(state, await l6_evaluator_node(state))  # type: ignore[arg-type]
    state["status"] = "needs_human_approval"
    state["pending_action"] = "review_drafts"
    return state


async def approve_drafts_flow(state: Dict[str, Any]) -> Dict[str, Any]:
    from careeragent.langgraph.nodes_l6_l9 import (
        l7_apply_node, l7_evaluator_node,
        l8_tracker_node, l8_evaluator_node,
        l9_analytics_node,
    )
    state["status"] = "running"
    state["pending_action"] = None
    _feed(state, "L7", "HITL", "Drafts approved. Applying + tracking + analytics…")
    state = _apply_delta(state, await l7_apply_node(state))      # type: ignore[arg-type]
    state = _apply_delta(state, await l7_evaluator_node(state))  # type: ignore[arg-type]
    if state.get("status") == "needs_human_approval":
        return state
    state = _apply_delta(state, await l8_tracker_node(state))      # type: ignore[arg-type]
    state = _apply_delta(state, await l8_evaluator_node(state))    # type: ignore[arg-type]
    if state.get("status") == "needs_human_approval":
        return state
    state = _apply_delta(state, await l9_analytics_node(state))    # type: ignore[arg-type]
    state["status"] = "completed"
    state["pending_action"] = None
    _feed(state, "L9", "HITL", "Run completed.")
    return state
''')

print("✅ Patched runtime_nodes: L3 is now timeout-safe + saves job URLs to artifacts + daily_jobs.")
print("Restart backend and start a NEW run.")

✅ CWD = /Users/ganeshprasadbhandari/Documents/D_drive/clark/careeragent-ai
BACKUP: /Users/ganeshprasadbhandari/Documents/D_drive/clark/careeragent-ai/src/careeragent/langgraph/runtime_nodes.py.bak_20260221_174648
WROTE: /Users/ganeshprasadbhandari/Documents/D_drive/clark/careeragent-ai/src/careeragent/langgraph/runtime_nodes.py
✅ Patched runtime_nodes: L3 is now timeout-safe + saves job URLs to artifacts + daily_jobs.
Restart backend and start a NEW run.


In [None]:
RUN_ID=b5d608fef245469881507ae41e266156
curl -sS "http://127.0.0.1:8000/status/$RUN_ID" | python -c "import sys,json; s=json.load(sys.stdin); print('steps',[(x.get('layer_id'),x.get('status'),x.get('finished_at_utc')) for x in (s.get('steps') or [])]); print('jobs_raw',len(s.get('jobs_raw') or [])); print('jobs_file',(s.get('artifacts') or {}).get('jobs_raw'))"
ls -la "src/careeragent/artifacts/runs/$RUN_ID/jobs_raw.json"