
# Career Advisor — Agentic LangGraph Prototype (Jupyter)

**Goal:** A dopamine-high, privacy-safe, *session-only* prototype that:
- Loads a profile (or runs a 10s Quick Profile) without persisting PII
- Extracts & confirms skills in-session
- Detects intents and persona
- **Always** consults the curated vector store (`internal_curated_informa_vectorstore`) for context
- Computes **contextual** capability benchmarks (no hard-coded "cloud")
- Synthesizes persona-aware, streaming-like responses with a rotating learning quote + value promise
- Shows quick charts (skill gaps) to keep engagement high

> This notebook uses **mock data** and **keyword-based search** (no live pgvector). You can later swap search with your Postgres + pgvector queries.


In [None]:

from __future__ import annotations
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional, Literal, Tuple
import re, random, math, statistics, time
import pandas as pd
import numpy as np
from IPython.display import display, Markdown
import matplotlib.pyplot as plt

Persona = Literal["IC","Manager","SeniorLeader"]
Intent = Literal["job","courses","development_plan","manager_toolkit","leadership_strategy","profile"]


In [None]:

@dataclass
class Profile:
    employee_id: Optional[str] = None
    name: Optional[str] = None
    title: Optional[str] = None
    band: Optional[str] = None
    division: Optional[str] = None
    skills: List[str] = field(default_factory=list)
    interests: List[str] = field(default_factory=list)

@dataclass
class SessionState:
    employee_id: Optional[str] = None
    persona: Persona = "IC"
    intents: List[Intent] = field(default_factory=list)
    profile: Optional[Profile] = None
    extracted_skills: List[str] = field(default_factory=list)
    confirmed_skills: List[str] = field(default_factory=list)
    gaps: List[str] = field(default_factory=list)
    job_hits: List[Dict[str, Any]] = field(default_factory=list)
    course_hits: List[Dict[str, Any]] = field(default_factory=list)
    curated_hits: List[Dict[str, Any]] = field(default_factory=list)
    correlation_id: Optional[str] = None

@dataclass
class NodeIO:
    state: SessionState
    trace: Dict[str, Any] = field(default_factory=dict)


In [None]:

QUOTES = [
    ("We are what we repeatedly do. Excellence, then, is not an act but a habit.", "Will Durant"),
    ("Learning never exhausts the mind.", "Leonardo da Vinci"),
    ("What we know is a drop; what we don’t know is an ocean.", "Isaac Newton"),
    ("Once you stop learning, you start dying.", "Albert Einstein"),
    ("The only limit to our realization of tomorrow is our doubts of today.", "F. D. Roosevelt"),
]

VALUE_PROMISE = (
    "In 2 minutes, I’ll:\n"
    "✅ Recommend 2 career paths in Informa\n"
    "✅ Show the 3 most valuable skills to build next\n"
    "✅ Give you 2 courses to start this month\n\n"
    "Shall we begin?"
)


In [None]:

CAPABILITY_MAP = {
    "data": "analytics_modeling",
    "analyst": "analytics_modeling",
    "scientist": "analytics_modeling",
    "ml": "ml_engineering",
    "backend": "systems_backend",
    "platform": "systems_backend",
    "sre": "reliability_engineering",
    "frontend": "frontend_engineering",
    "product": "product_discovery",
    "design": "ux_research",
    "manager": "hiring_coaching",
    "lead": "people_leadership",
    "director": "portfolio_strategy",
    "vp": "portfolio_strategy",
    "ops": "process_excellence",
    "support": "customer_success",
    "automation": "automation",
    "cloud": "cloud_platforms",
}

PRETTY = {
    "analytics_modeling": "analytics & modeling",
    "ml_engineering": "ML engineering",
    "systems_backend": "backend systems",
    "reliability_engineering": "site reliability",
    "frontend_engineering": "frontend engineering",
    "product_discovery": "product discovery",
    "ux_research": "UX research",
    "hiring_coaching": "hiring & coaching",
    "people_leadership": "people leadership",
    "portfolio_strategy": "portfolio strategy",
    "process_excellence": "process excellence",
    "customer_success": "customer success",
    "automation": "automation",
    "cloud_platforms": "cloud platforms",
}

def pretty_cap(cap: str) -> str:
    return PRETTY.get(cap, cap.replace("_", " "))

def choose_capability(state: SessionState) -> Optional[str]:
    title = (getattr(state.profile, "title", None) or "").lower()
    interests = set([s.lower() for s in getattr(state.profile, "interests", [])])
    skills = set([s.lower() for s in getattr(state.profile, "skills", [])])

    for key, cap in CAPABILITY_MAP.items():
        if key in title:
            return cap
    for key, cap in CAPABILITY_MAP.items():
        if any(key in s for s in interests) or any(key in s for s in skills):
            return cap
    return None

def capability_score(profile: Profile, capability: str) -> float:
    skills = [s.lower() for s in getattr(profile, "skills", [])]
    interest_bonus = 0.05 if any(k in (getattr(profile, "interests", []) or []) for k in ("career path", "learning", "mentoring")) else 0.0
    keys = [k for k, cap in CAPABILITY_MAP.items() if cap == capability]
    hits = sum(1 for s in skills if any(k in s for k in keys))
    score = min(1.0, (hits * 0.15) + interest_bonus)
    return max(0.0, score)

def rank_percentile(score: float, peers: List[float]) -> float:
    if not peers:
        return 0.5
    sorted_peers = sorted(peers)
    below = sum(1 for p in sorted_peers if p <= score)
    return below / max(1, len(sorted_peers))

def bucketize(score: float, peers: List[float]) -> str:
    if not peers or len(peers) < 10:
        return "above average" if score >= 0.5 else "developing"
    q = np.quantile(peers, [0.25, 0.5, 0.75])
    if score <= q[0]: return "bottom quartile"
    if score <= q[1]: return "below median"
    if score <= q[2]: return "above median"
    return "top quartile"

def fetch_peer_scores(persona: Persona, band: Optional[str], division: Optional[str]) -> List[float]:
    # synthetic, stable distribution for prototype
    return [i/100 for i in range(10, 95, 3)]

def benchmark_line(state: SessionState) -> Optional[str]:
    cap = choose_capability(state)
    if not cap or not state.profile:
        return None
    score = capability_score(state.profile, cap)
    peers = fetch_peer_scores(state.persona, state.profile.band, state.profile.division)
    if len(peers) < 50:
        bucket = bucketize(score, peers)
        return f"Your {pretty_cap(cap)} capability looks {bucket} for your band."
    pct = round(100 * rank_percentile(score, peers))
    if pct >= 67:
        return f"You’re stronger in {pretty_cap(cap)} than about {pct}% of your peer group."
    if pct >= 50:
        return f"Your {pretty_cap(cap)} capability is above the median for your peer group."
    return f"Your {pretty_cap(cap)} capability is developing relative to peers; I’ll recommend quick wins."


In [None]:

CURATED_COLLECTION = "internal_curated_informa_vectorstore"

# Simple mock docs (in production, use pgvector similarity search)
CURATED_DOCS = [
    {"id": "c1", "title": "Internal Career Paths — Data & Analytics", "text": "Paths from analyst to senior staff; key skills: modeling, SQL, Python, stakeholder comms.", "url": None},
    {"id": "c2", "title": "Manager Toolkit: Hiring & Coaching", "text": "Structured interviews, growth plans, capability matrices, feedback rituals.", "url": None},
    {"id": "c3", "title": "Frontend Excellence Playbook", "text": "Design systems, accessibility, performance budgets, testing pyramid.", "url": None},
    {"id": "c4", "title": "Reliability Runbook", "text": "SLOs, error budgets, incident response, postmortems, capacity planning.", "url": None},
    {"id": "c5", "title": "Leadership Strategy: Portfolio & Bets", "text": "North stars, investment themes, talent density, comms cadence.", "url": None},
]

def curated_search(query: str, topk: int = 3) -> List[Dict[str, Any]]:
    q = query.lower().strip()
    def score(doc):
        # simple keyword overlap score
        toks = set(re.findall(r"[a-z]+", q))
        dtoks = set(re.findall(r"[a-z]+", doc["text"].lower() + " " + doc["title"].lower()))
        return len(toks & dtoks) / (1 + len(toks))
    ranked = sorted(CURATED_DOCS, key=score, reverse=True)[:topk]
    for d in ranked:
        d["score"] = None  # placeholder
        d["snippet"] = (d["text"][:160] + "…") if len(d["text"]) > 160 else d["text"]
    return ranked


In [None]:

def node_profile_load(io: NodeIO, quick_profile: Optional[Dict[str, Any]] = None) -> NodeIO:
    if io.state.profile is None:
        if quick_profile:
            io.state.profile = Profile(**quick_profile)
            src = "quick_profile"
        else:
            io.state.profile = Profile(skills=[], interests=[])
            src = "empty_profile"
    else:
        src = "provided_profile"
    io.trace["profile_load"] = {"status": "ok", "source": src}
    return io

def node_skill_extract(io: NodeIO) -> NodeIO:
    # trivial extractor: normalize and unique
    base = (io.state.profile.skills if io.state.profile else []) + io.state.extracted_skills
    io.state.extracted_skills = sorted(set([s.strip() for s in base if s.strip()]))
    io.trace["skill_extract"] = {"status": "ok", "count": len(io.state.extracted_skills)}
    return io

def node_skill_confirm(io: NodeIO, add: List[str] = None, remove: List[str] = None) -> NodeIO:
    add = add or []
    remove = remove or []
    skills = set(io.state.extracted_skills)
    for a in add: skills.add(a)
    for r in remove:
        if r in skills: skills.remove(r)
    io.state.confirmed_skills = sorted(skills)
    io.trace["skill_confirm"] = {"status": "ok", "count": len(io.state.confirmed_skills)}
    return io

def node_intent_persona(io: NodeIO, utterance: str) -> NodeIO:
    io.trace["utterance"] = utterance
    intents = set()
    txt = utterance.lower()
    if any(k in txt for k in ["job", "role", "openings", "posting"]): intents.add("job")
    if any(k in txt for k in ["course", "learn", "upskill"]): intents.add("courses")
    if any(k in txt for k in ["plan", "30-day", "development"]): intents.add("development_plan")
    if any(k in txt for k in ["manager", "team", "coach"]): intents.add("manager_toolkit")
    if any(k in txt for k in ["leadership", "strategy", "org"]): intents.add("leadership_strategy")
    if not intents: intents.add("profile")
    io.state.intents = sorted(list(intents))
    io.trace["intent_persona"] = {"status": "ok", "intents": io.state.intents, "persona": io.state.persona}
    return io

def node_curated_kb(io: NodeIO) -> NodeIO:
    q = io.trace.get("utterance", "") or "career path skills"
    hits = curated_search(q, topk=3)
    io.state.curated_hits = hits
    io.trace["curated_kb"] = {"collection": CURATED_COLLECTION, "hits": len(hits)}
    return io

# Simple mock jobs/courses/gaps
ROLE_TEMPLATES = {
    "analytics_modeling": {
        "roles": ["Senior Data Analyst", "Analytics Engineer"],
        "skills": ["SQL", "Python", "Modeling", "Visualization"]
    },
    "frontend_engineering": {
        "roles": ["Senior Frontend Engineer", "UI Engineer"],
        "skills": ["React", "Accessibility", "Testing", "Performance"]
    },
    "systems_backend": {
        "roles": ["Senior Backend Engineer", "Platform Engineer"],
        "skills": ["APIs", "Databases", "Scalability", "Observability"]
    },
    "product_discovery": {
        "roles": ["Senior Product Manager", "Product Lead"],
        "skills": ["User research", "Roadmapping", "Prioritization", "Stakeholder"]
    },
}

COURSE_CATALOG = {
    "SQL": ["Advanced SQL Patterns", "SQL for Analytics"],
    "Python": ["Effective Python", "Data Wrangling with Python"],
    "Modeling": ["Feature Engineering", "Applied Forecasting"],
    "Visualization": ["Storytelling with Data", "Dashboards That Work"],
    "React": ["Modern React", "Testing React Apps"],
    "Accessibility": ["A11y Fundamentals"],
    "Testing": ["Testing Pyramid in Practice"],
    "Performance": ["Web Performance Foundations"],
    "APIs": ["Designing RESTful APIs"],
    "Databases": ["Relational Databases Deep Dive"],
    "Scalability": ["Scalable Systems 101"],
    "Observability": ["Observability Essentials"],
    "User research": ["Interview Techniques"],
    "Roadmapping": ["Outcome-Driven Roadmaps"],
    "Prioritization": ["Prioritize Like a Pro"],
    "Stakeholder": ["Stakeholder Communication"],
}

def node_jobs_kb(io: NodeIO) -> NodeIO:
    cap = choose_capability(io.state) or "analytics_modeling"
    roles = ROLE_TEMPLATES.get(cap, ROLE_TEMPLATES["analytics_modeling"])["roles"]
    io.state.job_hits = [{"title": r, "match": round(random.uniform(0.72, 0.92), 2)} for r in roles]
    io.trace["jobs_kb"] = {"status": "ok", "capability": cap, "count": len(io.state.job_hits)}
    return io

def node_skill_gap(io: NodeIO) -> NodeIO:
    cap = choose_capability(io.state) or "analytics_modeling"
    target = set(ROLE_TEMPLATES.get(cap, ROLE_TEMPLATES["analytics_modeling"])["skills"])
    have = set(io.state.confirmed_skills or io.state.extracted_skills)
    gaps = sorted([s for s in target if s not in have])
    io.state.gaps = gaps
    io.trace["skill_gap"] = {"status": "ok", "capability": cap, "gaps": gaps}
    return io

def node_courses_kb(io: NodeIO) -> NodeIO:
    hits = []
    for g in io.state.gaps[:3]:
        hits.extend([{"title": t, "covers": g} for t in COURSE_CATALOG.get(g, [])[:1]])
    io.state.course_hits = hits[:2] or [{"title": "Learning How to Learn", "covers": "meta"}]
    io.trace["courses_kb"] = {"status": "ok", "count": len(io.state.course_hits)}
    return io

def node_persona_summarize(io: NodeIO) -> NodeIO:
    quote, author = random.choice(QUOTES)
    brag = benchmark_line(io.state)
    sections = []
    sections.append(f"“{quote}” — {author}")
    if brag:
        sections.append(brag)

    # Curated blurb
    if io.state.curated_hits:
        lines = ["Here are a couple of internal resources I’ll use for context:"]
        for h in io.state.curated_hits[:2]:
            lines.append(f"• {h.get('title','Internal doc')}")
        sections.append("\n".join(lines))

    # Persona-aware sections
    if io.state.persona == "IC":
        sections.append("What we found → Closest roles → Skill gaps → Courses → Next steps")
    elif io.state.persona == "Manager":
        sections.append("Team insights → Roles to consider → Gap themes → Enablement plan → Actions/owners")
    else:
        sections.append("Signal → Implications → Talent moves → Capability gaps → Investments & comms")

    sections.append(VALUE_PROMISE)
    io.trace["persona_summarize"] = {"status": "ok", "brag": bool(brag)}
    io.trace["synth_payload"] = sections
    return io

def node_synth_stream(io: NodeIO, delay: float = 0.0) -> NodeIO:
    # Notebook-friendly "stream": print each section sequentially
    for sec in io.trace.get("synth_payload", []):
        display(Markdown(sec))
        if delay: time.sleep(delay)
    # Show jobs and courses as tables
    if io.state.job_hits:
        display(pd.DataFrame(io.state.job_hits))
    if io.state.course_hits:
        display(pd.DataFrame(io.state.course_hits))
    # Chart: skill gaps (if any)
    if io.state.gaps:
        plt.figure()
        vals = [1]*len(io.state.gaps)
        plt.bar(io.state.gaps, vals)
        plt.title("Top Skill Gaps (focus areas)")
        plt.xticks(rotation=20)
        plt.show()
    io.trace["synth_stream"] = {"status": "ok", "sections": len(io.trace.get("synth_payload", []))}
    return io


In [None]:

def run_career_advisor(
    utterance: str,
    persona: Persona = "IC",
    quick_profile: Optional[Dict[str, Any]] = None,
    skill_add: List[str] = None,
    skill_remove: List[str] = None,
    stream_delay: float = 0.0,
) -> NodeIO:
    state = SessionState(persona=persona)
    io = NodeIO(state=state)
    io = node_profile_load(io, quick_profile=quick_profile)
    io = node_skill_extract(io)
    io = node_skill_confirm(io, add=skill_add or [], remove=skill_remove or [])
    io = node_intent_persona(io, utterance=utterance)
    io = node_curated_kb(io)
    io = node_jobs_kb(io)
    io = node_skill_gap(io)
    io = node_courses_kb(io)
    io = node_persona_summarize(io)
    io = node_synth_stream(io, delay=stream_delay)
    return io


## Demo 1 — IC, Data Scientist profile

In [None]:

io = run_career_advisor(
    utterance="What jobs and courses should I consider next quarter?",
    persona="IC",
    quick_profile={
        "name": "Alex",
        "title": "Senior Data Scientist",
        "band": "Band 5",
        "division": "Analytics",
        "skills": ["Python", "Machine Learning", "Data Visualization"],
        "interests": ["career path", "forecasting"]
    },
    skill_add=["SQL"],
    stream_delay=0.0,
)
io.trace


## Demo 2 — Manager persona

In [None]:

io = run_career_advisor(
    utterance="I need a manager toolkit and a development plan for my team",
    persona="Manager",
    quick_profile={
        "name": "Jordan",
        "title": "Engineering Manager",
        "band": "Band 6",
        "division": "Platform",
        "skills": ["Hiring", "Coaching", "Python", "APIs"],
        "interests": ["people leadership"]
    },
)
io.trace


## Demo 3 — No identity, Quick Profile wizard

In [None]:

io = run_career_advisor(
    utterance="Show me leadership strategies and courses",
    persona="SeniorLeader",
    quick_profile={
        "title": "Director of Product",
        "band": "Band 7",
        "division": "Digital",
        "skills": ["Roadmapping", "Stakeholder"],
        "interests": ["strategy", "talent density"]
    },
)
io.trace



### Where to plug your real DB + embeddings

- Replace `curated_search()` with a Postgres + pgvector call against **`internal_curated_informa_vectorstore`**.
  - **Similarity SQL (preferred):**
    ```sql
    SELECT e.id, e.document, e.cmetadata, 1 - (e.embedding <=> %(query_vec)s) AS score
    FROM ai.langchain_pg_embedding e
    JOIN ai.langchain_pg_collection c ON c.uuid = e.collection_id
    WHERE c.name = %(collection)s
    ORDER BY e.embedding <=> %(query_vec)s
    LIMIT 8;
    ```
  - **Text fallback:**
    ```sql
    SELECT e.id, e.document, e.cmetadata
    FROM ai.langchain_pg_embedding e
    JOIN ai.langchain_pg_collection c ON c.uuid = e.collection_id
    WHERE c.name = %(collection)s
      AND (e.document ILIKE '%%' || %(query)s || '%%'
           OR CAST(e.cmetadata AS TEXT) ILIKE '%%' || %(query)s || '%%')
    LIMIT 5;
    ```

- For **profile retrieval**, implement your email/ID and *Find-me* queries in a similar helper cell.

- When moving to FastAPI, map each node into your existing `src/app/nodes/career/` modules and keep the same data contracts (`SessionState`, `NodeIO`).
