
# HR Career Advisor — LangGraph Workflow (Notebook Prototype)

Implements the workflow you approved (with **Prohibitor** and **Informer KB v2**):
- `prohibitor` → `setup_state` → `intent_persona` → `dispatcher`
- Retrieval tools: `pg_search`, `kb_search`, `informer_kb`
- Tools: `job_tool`, `courses_tool` (audience-aware)
- Reflexion: `job_reflexion`, `courses_reflexion`
- `consolidation_summary` composer

> `courses_tool` filters manager-only items using `state.is_manager`, unless the prompt explicitly asks for manager/leadership courses.


## 0) Setup & Environment

In [None]:

import os, json, re, time, requests
from typing import List, Dict, Any, Optional, Tuple, Literal
from dataclasses import dataclass
import numpy as np
import pandas as pd
from IPython.display import display, Markdown

# %pip install -q psycopg[binary] boto3 python-dotenv ipywidgets

from dotenv import load_dotenv
load_dotenv()

AWS_REGION = os.getenv("AWS_REGION", "us-west-2")
AWS_MODEL_ID = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"  # fixed per requirements

PG_DSN = os.getenv("PG_DSN","")  # postgresql://user:pass@host:5432/dbname  (URL-encode password)
PG_COLLECTIONS = [
    "internal_private_employee_profiles_vectorstore",
    "internal_curated_informa_vectorstore",
]

JOB_KB_ID = os.getenv("JOB_KB_ID","")
COURSES_KB_ID = os.getenv("COURSES_KB_ID","")

INFORMER_KB2_BASE_URL = os.getenv("INFORMER_KB2_BASE_URL","")
INFORMER_KB2_API_KEY  = os.getenv("INFORMER_KB2_API_KEY","")
INFORMER_KB2_TENANT_ID= os.getenv("INFORMER_KB2_TENANT_ID","informa")

print("Env present:", dict(
    PG=bool(PG_DSN),
    JOB_KB=bool(JOB_KB_ID),
    COURSES_KB=bool(COURSES_KB_ID),
    INFORMER=bool(INFORMER_KB2_BASE_URL and INFORMER_KB2_API_KEY)
))


## 1) PGVector Retrieval (no document re-embedding)

In [None]:

import psycopg

def get_pg_conn():
    if not PG_DSN:
        raise RuntimeError("PG_DSN not set")
    return psycopg.connect(PG_DSN)

KEYWORD_PREFILTER_SQL = """SELECT e.id, e.embedding, e.document, e.cmetadata, c.name as collection
FROM ai.langchain_pg_embedding e
JOIN ai.langchain_pg_collection c ON c.uuid = e.collection_id
WHERE c.name = %(collection)s
  AND (e.document ILIKE '%%' || %(query)s || '%%'
       OR CAST(e.cmetadata AS TEXT) ILIKE '%%' || %(query)s || '%%')
LIMIT %(k)s;
"""

def _to_meta(meta):
    if isinstance(meta,(dict,list)): return meta
    try: return json.loads(meta)
    except: return {"raw": str(meta)}

def _cosine(a: np.ndarray, b: np.ndarray) -> float:
    denom = (np.linalg.norm(a) * np.linalg.norm(b))
    if denom == 0: return 0.0
    return float(np.dot(a, b) / denom)

def pg_search_hybrid(collection: str, query: str, pre_k: int = 24, top_k: int = 8) -> List[Dict[str,Any]]:
    with get_pg_conn() as conn, conn.cursor() as cur:
        cur.execute(KEYWORD_PREFILTER_SQL, {"collection": collection, "query": query, "k": pre_k})
        rows = cur.fetchall()
    if not rows: return []
    embs, items = [], []
    for _id, emb, doc, meta, coll in rows:
        v = np.array(emb, dtype=np.float32)
        embs.append(v)
        items.append({"id": _id, "embedding": emb, "document": doc, "metadata": _to_meta(meta), "collection": coll})
    centroid = np.mean(embs, axis=0)
    for it in items:
        it["score"] = _cosine(centroid, np.array(it["embedding"], dtype=np.float32))
    items.sort(key=lambda x: x.get("score",0.0), reverse=True)
    return items[:top_k]

def pg_multi_search(query: str, collections: List[str]) -> List[Dict[str,Any]]:
    hits = []
    for coll in collections:
        try:
            hits.extend(pg_search_hybrid(coll, query, 24, 8))
        except Exception as e:
            print(f"⚠️ PG search failed for {coll}: {e}")
    hits.sort(key=lambda x: x.get("score",0.0), reverse=True)
    return hits[: max(6, len(collections)) ]


## 2) AWS Knowledge Bases Retrieval

In [None]:

import boto3
try:
    kb_rt = boto3.client("bedrock-agent-runtime", region_name=AWS_REGION) if (JOB_KB_ID or COURSES_KB_ID) else None
except Exception as e:
    kb_rt = None
    print("⚠️ AWS KB unavailable:", e)

def kb_retrieve(kb_id: str, query: str, top_k: int = 5) -> List[Dict[str,Any]]:
    if not kb_rt or not kb_id:
        return []
    try:
        resp = kb_rt.retrieve(
            knowledgeBaseId=kb_id,
            retrievalConfiguration={"vectorSearchConfiguration": {"numberOfResults": top_k}},
            retrievalQuery={"text": query},
        )
        out = []
        for r in resp.get("retrievalResults", []):
            c = r.get("content", {})
            out.append({
                "title": c.get("title") or (c.get("text","").split("\n")[0][:80]),
                "snippet": c.get("snippetText") or c.get("text","")[:240],
                "score": r.get("score"),
                "kb_id": kb_id,
                "metadata": r.get("metadata") or {},
                "source": r.get("location", {}).get("s3Location", {}).get("uri"),
                "type": r.get("metadata",{}).get("type")
            })
        return out
    except Exception as e:
        print("⚠️ KB retrieve failed:", e)
        return []

def kb_search_all(query: str) -> Dict[str, List[Dict[str,Any]]]:
    return {
        "jobs":    kb_retrieve(JOB_KB_ID, query, 6) if JOB_KB_ID else [],
        "courses": kb_retrieve(COURSES_KB_ID, query, 6) if COURSES_KB_ID else [],
    }


## 3) Informer KB v2 Retrieval

In [None]:

class InformerKB2:
    def __init__(self, base_url: str, api_key: str, tenant: str):
        self.base = (base_url or "").rstrip("/")
        self.key  = api_key or ""
        self.tenant = tenant or "informa"
    def _headers(self):
        return {
            "Authorization": f"Bearer {self.key}",
            "X-Tenant": self.tenant,
            "Content-Type": "application/json"
        }
    def search(self, query: str, top_k: int = 8, filters: Optional[Dict[str,Any]] = None) -> List[Dict[str,Any]]:
        if not (self.base and self.key):
            return []
        payload = {"query": query, "k": top_k, "filters": filters or {}}
        r = requests.post(f"{self.base}/search", headers=self._headers(), data=json.dumps(payload), timeout=20)
        r.raise_for_status()
        rows = r.json().get("results", [])
        out = []
        for itm in rows:
            out.append({
                "title": itm.get("title") or "KB item",
                "snippet": itm.get("snippet") or itm.get("text","")[:240],
                "score": itm.get("score"),
                "metadata": itm.get("metadata") or {},
                "source": itm.get("source") or "informer_kb2"
            })
        return out

informer = InformerKB2(INFORMER_KB2_BASE_URL, INFORMER_KB2_API_KEY, INFORMER_KB2_TENANT_ID)

def informer_kb_search(query: str, top_k: int = 8, filters: Optional[Dict[str,Any]] = None) -> List[Dict[str,Any]]:
    try:
        return informer.search(query, top_k, filters)
    except Exception as e:
        print("⚠️ Informer KB2 failed:", e)
        return []


## 4) Prohibitor, State, Intent

In [None]:

AllowedIntents = {"courses","job","development_plan","manager_toolkit","leadership_strategy","career"}

def prohibitor(user_text: str) -> Dict[str,Any]:
    t = user_text.lower()
    allowed = any(k in t for k in ["career","course","job","role","roles","learn","upskill","development","manager","leadership","okr","coaching","promotion","ladder","mentoring","objective","okrs"])
    intents = []
    if any(k in t for k in ["job","jobs","opening","openings","role","roles"]): intents.append("job")
    if any(k in t for k in ["course","courses","learn","training","upskill"]): intents.append("courses")
    if any(k in t for k in ["mentoring","mentor"]): intents.append("manager_toolkit")
    if any(k in t for k in ["objective","okr","okrs"]): intents.append("leadership_strategy")
    if any(k in t for k in ["development plan","30-day","60-day","90-day","dev plan"]): intents.append("development_plan")
    if not intents and allowed: intents.append("career")
    return {"allowed": allowed and bool(intents), "intents": intents or [], "rationale": "heuristic v0.1"}

@dataclass
class AgentState:
    email: Optional[str] = None
    employee_id: Optional[str] = None
    is_manager: bool = False
    prompt: Optional[str] = None

def profile_lookup(email: Optional[str] = None) -> List[Dict[str,Any]]:
    if not (PG_DSN and email): return []
    q = """    SELECT e.document, e.cmetadata
    FROM ai.langchain_pg_embedding e
    JOIN ai.langchain_pg_collection c ON c.uuid = e.collection_id
    WHERE c.name = 'internal_private_employee_profiles_vectorstore'
      AND (e.cmetadata->>'email') = %(email)s
    LIMIT 10;
    """
    with get_pg_conn() as conn, conn.cursor() as cur:
        cur.execute(q, {"email": email})
        rows = cur.fetchall()
    out = []
    for doc, meta in rows:
        try: meta = json.loads(meta) if not isinstance(meta,(dict,list)) else meta
        except: meta = {"raw": meta}
        out.append({"document": doc, "metadata": meta})
    return out

def derive_is_manager_from_profile(meta: dict) -> bool:
    if str(meta.get("is_manager","")).lower() in {"true","1","yes"}: return True
    if int(meta.get("direct_reports",0) or 0) > 0: return True
    title = (meta.get("title") or "").lower()
    if any(k in title for k in [" manager","lead","head of","director","vp"]): return True
    return False

def setup_state(email: Optional[str], override_is_manager: Optional[bool], user_text: str) -> Tuple[AgentState, dict]:
    rows = profile_lookup(email=email) if email else []
    meta = rows[0]["metadata"] if rows else {}
    is_mgr = override_is_manager if override_is_manager is not None else derive_is_manager_from_profile(meta)
    st = AgentState(email=email, employee_id=meta.get("employee_id"), is_manager=is_mgr, prompt=user_text)
    return st, meta

def intent_persona(intents: List[str]) -> List[str]:
    return sorted(set(i for i in intents if i in AllowedIntents))


## 5) Tools & Reflexion

In [None]:

def tool_pg_search(query: str, k: int = 8) -> List[Dict[str,Any]]:
    return pg_multi_search(query, PG_COLLECTIONS)[:k]

def tool_kb_search(query: str, top_k: int = 6) -> Dict[str, List[Dict[str,Any]]]:
    return kb_search_all(query)

def tool_informer_kb(query: str, top_k: int = 8, filters: Optional[Dict[str,Any]] = None) -> List[Dict[str,Any]]:
    return informer_kb_search(query, top_k, filters)

MANAGER_KEYWORDS = {"manager","leadership","org design","hiring","coaching","performance review","okr","okrs","succession"}

def looks_manager_only(item: Dict[str,Any]) -> bool:
    meta = (item.get("metadata") or {})
    audience = str(meta.get("audience","")).lower()
    title = (item.get("title") or item.get("document") or "").lower()
    tags = " ".join(meta.get("tags", [])).lower()
    if audience in {"manager","leadership"}: return True
    haystack = f"{title} {tags}"
    return any(kw in haystack for kw in MANAGER_KEYWORDS)

def explicit_manager_request(prompt: str) -> bool:
    p = (prompt or "").lower()
    return any(k in p for k in MANAGER_KEYWORDS)

def job_tool(query: str) -> List[Dict[str,Any]]:
    jobs = []
    kb = tool_kb_search(query).get("jobs", [])
    pg = [h for h in tool_pg_search(query, 12) if (h.get("metadata") or {}).get("type") in {"job","role"}]
    info = [h for h in tool_informer_kb(query, 8) if (h.get("metadata") or {}).get("type") in {"job","role"}]
    jobs.extend(kb[:4] or []); jobs.extend(pg[:4] or []); jobs.extend(info[:2] or [])
    seen, dedup = set(), []
    for j in jobs:
        t = (j.get("title") or (j.get("metadata") or {}).get("title") or "").strip().lower()
        if not t or t in seen: continue
        seen.add(t); dedup.append(j)
    return dedup[:4]

def courses_tool(query: str, state: AgentState) -> List[Dict[str,Any]]:
    courses = []
    kb = tool_kb_search(query).get("courses", [])
    pg = [h for h in tool_pg_search(query, 12) if (h.get("metadata") or {}).get("type") == "course"]
    info = [h for h in tool_informer_kb(query, 8) if (h.get("metadata") or {}).get("type") == "course"]
    courses.extend(kb[:6] or []); courses.extend(pg[:6] or []); courses.extend(info[:4] or [])
    if state.is_manager or explicit_manager_request(state.prompt or ""):
        filtered = courses
    else:
        filtered = [c for c in courses if not looks_manager_only(c)]
    out, seen = [], set()
    for c in filtered:
        title = c.get("title") or (c.get("metadata") or {}).get("title") or "Course"
        if title.lower() in seen: continue
        seen.add(title.lower())
        out.append({"title": title, "metadata": c.get("metadata") or {}, "source": c.get("source") or "KB/PG"})
    return out[:4]

def job_reflexion(items: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
    return sorted(items, key=lambda x: (-float(x.get("score") or 0.0), len(x.get("title",""))))

def courses_reflexion(items: List[Dict[str,Any]], is_manager: bool) -> List[Dict[str,Any]]:
    def rank(it):
        meta = it.get("metadata") or {}
        aud = (meta.get("audience") or "").lower()
        penal = 0 if is_manager else (1 if aud in {"manager","leadership"} else 0)
        return (penal, -float(it.get("score") or 0.0))
    return sorted(items, key=rank)


## 6) Consolidation & Compose

In [None]:

def consolidation_summary(sections: Dict[str,Any]) -> str:
    blocks = []
    blocks.append("**Let’s make this worth your time.**\n_I don’t store your info — anything you share is used only for this session._\n")
    blocks.append("In 2 minutes, I’ll:\n"
                  "✅ Recommend 2 career paths in Informa\n"
                  "✅ Show the 3 most valuable skills to build next\n"
                  "✅ Give you 2 courses to start this month\n")
    jobs = sections.get("jobs") or []
    courses = sections.get("courses") or []
    if jobs:
        blocks.append("\n### Closest roles to explore")
        for j in jobs[:2]:
            t = j.get("title") or (j.get("metadata") or {}).get("title") or "Role"
            blocks.append(f"- {t}")
    if courses:
        blocks.append("\n### Courses to start this month")
        for c in courses[:2]:
            blocks.append(f"- {c.get('title','Course')}")
    blocks.append("\n**Would you like to explore other courses or open jobs?**")
    return "\n".join(blocks)


## 7) Orchestrator (matches your diagram)

In [None]:

def run_workflow(user_text: str, email: Optional[str] = None, override_is_manager: Optional[bool] = None) -> Dict[str,Any]:
    gate = prohibitor(user_text)
    if not gate.get("allowed"):
        answer = "I’m scoped to career planning (roles/jobs, courses, development plans, manager/leadership toolkits, leadership strategy). Ask me one of those and I’ll help fast."
        return {"blocked": True, "gate": gate, "answer": answer}

    state, profile_meta = setup_state(email=email, override_is_manager=override_is_manager, user_text=user_text)
    intents = intent_persona(gate.get("intents", []))

    sections = {}

    if "job" in intents:
        jobs = job_reflexion(job_tool(user_text))
        sections["jobs"] = jobs

    if "courses" in intents:
        courses = courses_reflexion(courses_tool(user_text, state), state.is_manager)
        sections["courses"] = courses

    if "development_plan" in intents:
        ctx = tool_pg_search("development plan " + (user_text or ""), 6) + informer_kb_search("development plan " + (user_text or ""), 6)
        sections["development_plan"] = ctx[:5]

    if "manager_toolkit" in intents:
        ctx = informer_kb_search("manager coaching feedback OKRs " + (user_text or ""), 6) + tool_pg_search("manager coaching " + (user_text or ""), 6)
        sections["manager_toolkit"] = ctx[:5]

    if "leadership_strategy" in intents:
        ctx = informer_kb_search("org capability gaps portfolio talent moves " + (user_text or ""), 6) + tool_pg_search("capability gaps portfolio " + (user_text or ""), 6)
        sections["leadership_strategy"] = ctx[:5]

    final = consolidation_summary(sections)
    return {"blocked": False, "gate": gate, "state": state, "sections": sections, "answer": final}


## 8) Quick Smoke Tests

In [None]:

tests = [
    ("Reset my laptop password", None, None),
    ("What jobs and courses should I look at for data engineering?", None, False),
    ("What jobs and courses should I look at for data engineering?", None, True),
    ("OKRs coaching guide for managers", None, False),
    ("What capability gaps should I invest in across teams?", None, True),
]

for text, email, is_mgr in tests:
    print("\n---\nQ:", text, "| override_is_manager:", is_mgr)
    out = run_workflow(text, email=email, override_is_manager=is_mgr)
    if out.get("blocked"):
        print("BLOCKED:", out["answer"])
    else:
        display(Markdown(out["answer"]))
        print("intents:", out["gate"]["intents"], "| is_manager:", out["state"].is_manager)
