# Job Hunt Agent Multi System

### Dependencies

In [1]:
!pip -q install openai beautifulsoup4 requests reportlab pdfminer.six

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m41.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from openai import OpenAI
import os, re, json, requests
from typing import List, Dict, Optional, Any, Tuple
from dataclasses import dataclass, field
from bs4 import BeautifulSoup
from getpass import getpass
from urllib.parse import urlparse
import pathlib, tempfile, uuid, json

In [3]:
# PDF scraping of CVs
from pdfminer.high_level import extract_text as pdf_extract_text

# PDF generation
from reportlab.lib.pagesizes import LETTER
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.units import inch
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT

In [4]:
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")
print("API key set ✔️")


Enter your OpenAI API key: ··········
API key set ✔️


In [5]:
from openai import OpenAI
client = OpenAI()

# Model name (change if you want)
MODEL = "gpt-3.5-turbo"

In [6]:
# Colab download helper
try:
    from google.colab import files as colab_files
    IN_COLAB = True
except Exception:
    IN_COLAB = False

### Helper functions

In [7]:
# Extract content from CV
def extract_text_from_pdf(pdf_path: str) -> str:
    text = pdf_extract_text(pdf_path) or ""
    text = re.sub(r"[ \t]+", " ", text)
    text = re.sub(r"\n{3,}", "\n\n", text)
    return text[:200000]



In [8]:
# Extract contact details from CV
def sniff_contact(cv_text: str) -> Dict[str, str]:
    email = re.search(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", cv_text)
    phone = re.search(r"(\+?\d[\d \-()]{7,})", cv_text)
    # crude "name": first non-empty line that isn't email/phone
    first_lines = [ln.strip() for ln in cv_text.splitlines()[:10] if ln.strip()]
    name = ""
    for ln in first_lines:
        if (email and email.group(0) in ln) or (phone and phone.group(0) in ln):
            continue
        if len(ln.split()) <= 6:  # likely short name line
            name = ln
            break
    return {
        "name": name or "Candidate",
        "email": email.group(0) if email else "",
        "phone": phone.group(0) if phone else "",
        "location": ""  # could be improved with geo regex if needed
    }

### Downloadable cover letter pdf

In [9]:
def save_cover_letter_pdf(letter_text: str, file_path: str) -> str:

    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    doc = SimpleDocTemplate(file_path, pagesize=LETTER,
                            rightMargin=72, leftMargin=72,
                            topMargin=72, bottomMargin=72)

    styles = getSampleStyleSheet()
    base = ParagraphStyle('Body', parent=styles['Normal'],
                          fontName='Times-Roman', fontSize=11, leading=15, alignment=TA_JUSTIFY)
    header = ParagraphStyle('Header', parent=styles['Normal'],
                            fontName='Times-Bold', fontSize=14, leading=18, alignment=TA_LEFT, spaceAfter=12)

    chunks = letter_text.strip().splitlines()
    header_lines, body_lines, hit_blank = [], [], False
    for line in chunks:
        if not hit_blank and line.strip() == "":
            hit_blank = True
            continue
        (header_lines if not hit_blank else body_lines).append(line)

    flow = []
    if header_lines:
        flow.append(Paragraph("<br/>".join([e for e in header_lines if e.strip()]), header))
        flow.append(Spacer(1, 0.2 * inch))

    body = "\n".join(body_lines) if body_lines else letter_text
    for p in [p.strip() for p in re.split(r"\n\s*\n", body) if p.strip()]:
        flow.append(Paragraph(p.replace("\n", "<br/>"), base))
        flow.append(Spacer(1, 0.18 * inch))

    doc.build(flow)
    return file_path

### Scrape job descriptions from the job role link

In [10]:
# A little advanced Job Description scraper for robust scraping or a fallback method for user to paste Job description.

class RoleScraper:
    UA = "Mozilla/5.0 (JobAgents/1.0)"

    @staticmethod
    def scrape(url: str, timeout: int = 12) -> Dict[str, str]:
        dom = (urlparse(url).netloc or "").lower()
        try:
            html = requests.get(url, timeout=timeout, headers={"User-Agent": RoleScraper.UA}).text
        except Exception:
            # Network/HTTP issue, if the JD is not scraping → orchestrator will ask for JD text
            return {"url": url, "title_raw": "", "description": "", "company_name": "", "gated": True}

        # soup (use lxml if available, else fallback quietly)
        try:
            soup = BeautifulSoup(html, "lxml")
        except Exception:
            soup = BeautifulSoup(html, "html.parser")

        # --- title ---
        ogt = soup.select_one('meta[property="og:title"], meta[name="og:title"]')
        title = (ogt.get("content") or "").strip() if ogt and ogt.get("content") else (soup.title.get_text(strip=True) if soup.title else "")
        title = title[:300]

        # --- description: JSON-LD JobPosting first ---
        desc, company = "", ""
        for tag in soup.select('script[type="application/ld+json"]'):
            try:
                data = json.loads(tag.string or "")
            except Exception:
                continue
            objs = data if isinstance(data, list) else [data]
            for obj in objs:
                if isinstance(obj, dict) and "JobPosting" in str(obj.get("@type", "")):
                    if isinstance(obj.get("description"), str) and len(obj["description"]) > len(desc):
                        desc = obj["description"]
                    org = obj.get("hiringOrganization") or {}
                    nm = (org.get("name") if isinstance(org, dict) else org) or ""
                    if isinstance(nm, str) and len(nm) > len(company):
                        company = nm.strip()

        if desc:
            desc = re.sub(r"<br\s*/?>", "\n", desc, flags=re.I)
            desc = re.sub(r"<[^>]+>", "", desc)

        # --- if still thin (scraped JD is limited), collect biggest text block (ATS selectors first) ---
        if len(desc) < 200:
            selectors = [
                ".opening .content", ".opening .description",
                ".posting .section", ".posting .content",
                "article#job-application", ".job-body", ".job__description",
                "[data-ashby-job-posting-description]",
                "section", "article", "div"
            ]
            blocks, seen = [], set()
            for sel in selectors:
                for node in soup.select(sel):
                    txt = node.get_text("\n", strip=True)
                    if txt and len(txt) > 200:
                        h = hash(txt)
                        if h in seen:
                            continue
                        seen.add(h); blocks.append(txt)
            if blocks:
                desc = max(blocks, key=len)
            else:
                desc = soup.get_text("\n", strip=True)

        # normalize desc
        desc = re.sub(r"[ \t]+\n", "\n", desc or "")
        desc = re.sub(r"\n{3,}", "\n\n", desc).strip()[:20000]

        # --- company fallback: meta → host → regex ---
        if not company:
            for sel in ['meta[name="company"]','meta[property="og:site_name"]','meta[name="twitter:site"]']:
                el = soup.select_one(sel)
                if el and el.get("content"):
                    company = el["content"].strip().lstrip("@"); break
        if not company:
            host = dom.split(":")[0].lower().removeprefix("www.")
            core = (host.split(".")[-2] if "." in host else host).replace("-", " ").title()
            company = core

        # last-ditch from title/desc if host is generic
        if company in {"Jobs", "Careers", ""}:
            m = re.search(r"(?:company|client|organization)\s*:\s*([\w&\-\.\s,]+)", desc, re.I) \
                or re.search(r"[-–—]\s*([A-Za-z0-9&\-\.\s]{2,})\s*(?:\(|$)", title)
            if m: company = m.group(1).strip(" ,|·-()")

        return {
            "url": url,
            "title_raw": title,
            "description": desc,
            "company_name": company[:200],
            "gated": len(desc) < 200,   # your orchestrator already uses description length / needs_jd_text
        }


## Base Agent syntax

In [11]:
@dataclass
class AgentMessage:
    role: str
    content: str

@dataclass
class BaseAgent:
    name: str
    system_prompt: str

    def call_openai(self, messages: List[AgentMessage],
                    model: str = MODEL, temperature: int = 0.45, max_tokens: int = 2000) -> str:
        payload = [{"role": m.role, "content": m.content} for m in messages]
        payload.insert(0, {"role": "system", "content": self.system_prompt})
        completion = client.chat.completions.create(
            model=model,
            max_completion_tokens=max_tokens,
            temperature=0.45,
            messages=payload,
)
        return completion.choices[0].message.content.strip()

## Cover letter agent

In [12]:
class CoverLetterAgent(BaseAgent):
    def run(self, cv_text: str, job: Dict[str,str], candidate: Dict[str,str],
            output_mode: str = "pdf") -> Dict[str,Any]:

        prompt = f"""
          You are an expert career storyteller and professional cover letter writer. Your style is persuasive, authentic, and laser-focused on connecting a candidate's achievements to a company's needs. You avoid corporate jargon and clichés.

          Your mission is to write a compelling, concise cover letter that makes the hiring manager excited to interview this candidate.

          First, perform this internal analysis (do not write this part in the output):
          1.  **Deconstruct the Role:** What are the top 3 most critical responsibilities and qualifications listed in the job description? What is the core problem this role solves?
          2.  **Map the Candidate:** For each critical point, find the strongest piece of evidence (a specific project, skill, or quantified achievement) from the candidate's CV.
          3.  **Find the Narrative:** What is the core story here? Is it about someone with deep domain knowledge, someone with similar experience, or someone pivoting their skills in a unique way? The letter must tell this story.

          Now, using your analysis, write the cover letter. It must have atleast:
          - A clear contact header.
          - An opening paragraph that hooks the reader and states the specific role.
          - A body paragraph that provides concrete, quantified evidence of how the candidate's skills solve the company's needs. Focus on the 2-3 most impactful points you identified.
          - A closing paragraph that conveys genuine enthusiasm for the company's mission and includes a clear call to action.

          Candidate CV:
          {cv_text}

          Job Posting:
          Title: {job.get('title_raw','')}
          URL: {job.get('url','')}
          Description: {job.get('description','')}

          Candidate Details for Header:
          Name: {candidate.get('name','Candidate')}
          Email: {candidate.get('email','')}
          Phone: {candidate.get('phone','')}
          Location: {candidate.get('location','')}

          Output ONLY the full, final letter text, starting with the header.
      """

        letter = self.call_openai([AgentMessage("user", prompt)],  max_tokens=3000)


        safe = re.sub(r"[^A-Za-z0-9]+","_", candidate.get("name","Candidate")).strip("_") or "Candidate"
        pdf_path = str(pathlib.Path(tempfile.gettempdir()) / f"{safe}_{uuid.uuid4().hex[:8]}_Cover_Letter.pdf")
        save_cover_letter_pdf(letter, pdf_path)

        return {"letter": letter, "pdf_path": pdf_path}


# Feedback loop function to improve cover letter based on user feedback
    def revise(self, original_letter: str, feedback: str,
               cv_text: str, job: Dict[str,str], candidate: Dict[str,str]) -> str:
        """Improve the ORIGINAL Cover letter using user feedback (this agent does the editing)."""
        prompt = f"""
                  Revise the following cover letter to address the user's feedback while preserving facts.
                 Improve clarity, specificity, and impact; avoid clichés.

                  User feedback: {feedback.strip() or "(none provided)"}

                  CV:
                  {cv_text}

                  Original letter:
                  {original_letter}

                  Output:
                  Return ONLY the revised full letter starting with the contact header. No commentary.
                 """
        return self.call_openai([AgentMessage("user", prompt)], max_tokens=3000)


## Networking agent

In [13]:
class NetworkingAgent(BaseAgent):
    """
    Generates two relationship-first outreach messages:
      - referral_request (LinkedIn DM)
      - cold_email (starts with "Subject: ...")
    Returns: {"referral_request": str, "cold_email": str}
    """

    # internal helpers
    @staticmethod
    def _clip(s: str, n: int) -> str:
        return (s or "")[:n]

    @staticmethod
    def _safe_json(s: str) -> Dict[str, Any]:
        """
        Parse JSON from the model; if it adds extra prose, extract the last {...} block.
        """
        s = (s or "").strip()
        try:
            return json.loads(s)
        except Exception:
            pass
        m = re.search(r"\{[\s\S]*\}\s*$", s)
        if m:
            try:
                return json.loads(m.group(0))
            except Exception:
                return {}
        return {}

    @staticmethod
    def _ensure_subject(email_text: str) -> str:
        """
        Ensure email begins with a single 'Subject: ...' line; remove duplicates if any.
        """
        t = (email_text or "").strip()
        if not re.match(r"(?i)^subject\s*:", t):
            t = "Subject: Quick question about {{role}} at {{company}}\n\n" + t
        lines = t.splitlines()
        cleaned = [lines[0]] + [ln for ln in lines[1:] if not re.match(r"(?i)^subject\s*:", ln)]
        return "\n".join(cleaned).strip()

    @staticmethod
    def _wc(txt: str) -> int:
        return len(re.findall(r"\w+", txt or ""))

    def _fallback_messages(self, job_url: str = "") -> Dict[str, str]:
        """
        High-quality, neutral fallbacks used only if the model output is unusable.
        Keeps placeholders for personalization and stays within length targets.
        """
        dm = (
            "Hi {{recipient_name}}, I’m exploring the {{role}} role at {{company}} ({{job_link}}). "
            "I’ve shipped results in similar problem spaces and would value your perspective. "
            "If you have 10–15 minutes this week, could I ask two focused questions about the team’s priorities "
            "and what success looks like in the first 90 days? Happy to keep it brief. — {{your_name}}"
        )
        email = (
            "Subject: Quick question about {{role}} at {{company}}\n\n"
            "Hi {{recipient_name}},\n\n"
            "I’m preparing to apply for the {{role}} role at {{company}} ({{job_link}}). From my background, I’ve led work that "
            "maps closely to the challenges your team tackles and I’m keen to understand how you approach them.\n\n"
            "Would you be open to a 10–15 minute chat, or a quick reply to two specific questions about the role’s priorities and metrics for success? "
            "I’ll keep it concise and come prepared.\n\n"
            "Thanks,\n{{your_name}}"
        )
        return {"referral_request": dm, "cold_email": email}

    def _guided_retry(self, reason: str) -> Dict[str, Any]:
        """
        Ask the model once to return a clean JSON object with the required keys.
        This keeps output model-authored without injecting generic content unless still missing.
        """
        fix_msg = f"""
              Your last reply violated the requirement: {reason}.
              Return ONE valid JSON object ONLY (no markdown, no commentary) with EXACTLY these keys:
              "referral_request": "<LinkedIn DM text>"
              "cold_email": "Subject: ...\\n\\n<Email body>"
                   """.strip()
        raw2 = self.call_openai([AgentMessage("user", fix_msg)], max_tokens=400)
        return self._safe_json(raw2)

    def run(
        self,
        cv_text: str,
        job: Dict[str, str],
        company_hint: str = "",
        tone: str = "Neutral professional",
    ) -> Dict[str, Any]:
        """
        Generate initial outreach messages.
        """
        cv = self._clip(cv_text, 6000)
        jd = self._clip(job.get("description", "") or "", 4000)
        title = (job.get("title_raw") or "").strip()
        company = company_hint or (title.split(" at ")[-1].split("|")[0].strip() if " at " in title else "")

        prompt = f"""
              You are a principal networking strategist. Generate TWO messages that start a relationship with an employee at {{company}} about the {{role}} role—aim for a brief chat or advice, NOT a referral ask.

              Tone: {tone}. Concise, specific, respectful of time. Avoid region-specific idioms unless tone requests it.

              Constraints
              - Use placeholders where helpful: {{{{recipient_name}}}}, {{{{your_name}}}}, {{{{role}}}}, {{{{company}}}}, {{{{job_link}}}}.
              - LinkedIn DM (“referral_request”): 70–140 words, 2–4 short sentences, no Subject line.
              - Cold email (“cold_email”): 90–180 words. FIRST LINE MUST be: "Subject: ...".
              - Use ONE concrete hook from the JD/company (recent work, product, problem space) to signal research.
              - Make ONE easy CTA (10–15 min chat or 1–2 specific questions). No begging, no “please refer me”.

              Banned
              - “I hope this finds you well”, emojis, excessive exclamation, generic praise, apologies.
              - Inventing specifics not implied by the JD/CV.

              Structure for each message
              1) Hook (specific to {{company}}/role via JD),
              2) Bridge (candidate’s most relevant result/skill—quantify once if possible),
              3) CTA (single, low-friction ask).

              Context
              - CV (excerpt): {cv}
              - Job title: {title}
              - Job link: {job.get('url','')}
              - JD (excerpt): {jd}
              - Target company: "{company or 'the company'}"

              Output
              Return ONE valid JSON object ONLY (no markdown, no commentary) with EXACTLY these keys:
              "referral_request": "<LinkedIn DM text>"
              "cold_email": "Subject: ...\\n\\n<Email body>"
              """.strip()


        raw = self.call_openai([AgentMessage("user", prompt)], temperature=0.4, max_tokens=3000)
        data = self._safe_json(raw)

        rr = (data.get("referral_request") or "").strip()
        ce = (data.get("cold_email") or "").strip()

        # guided retry if missing/empty
        reasons = []
        if not rr: reasons.append("missing 'referral_request'")
        if not ce: reasons.append("missing 'cold_email'")
        if reasons:
            data2 = self._guided_retry(", ".join(reasons))
            rr = (data2.get("referral_request") or rr).strip()
            ce = (data2.get("cold_email") or ce).strip()

        # final fallback to quality templates if still empty
        if not rr or not ce:
            fb = self._fallback_messages(job.get("url", ""))
            rr = rr or fb["referral_request"]
            ce = ce or fb["cold_email"]

        # enforce subject + soft caps
        ce = self._ensure_subject(ce)
        if self._wc(rr) > 160: rr = " ".join(rr.split()[:160])
        if self._wc(ce) > 220: ce = " ".join(ce.split()[:220])

        return {"referral_request": rr, "cold_email": ce}

    def revise(
        self,
        original_msgs: Dict[str, str],
        feedback: str,
        cv_text: str,
        job: Dict[str, str],
        candidate_name: str,
        tone: str = "Neutral professional",
    ) -> Dict[str, str]:
        """
        Improve the latest messages per user feedback (relationship-first; no direct referral ask).
        """
        cv = self._clip(cv_text, 6000)
        jd = self._clip(job.get("description", "") or "", 4000)
        fb = (feedback or "Make it sharper, more specific, and keep one clear, low-friction CTA.").strip()

        prompt = f"""
                    You are a principal outreach editor. Improve the TWO messages based on the user’s feedback while keeping the relationship-first approach (advice/insight or brief chat; do NOT ask for a referral).

                    Tone: {tone}. Concise, specific, respectful of time.

                    Keep
                    - “referral_request” (LinkedIn DM): 70–140 words; 2–4 short sentences; no Subject line.
                    - “cold_email”: 90–180 words; the FIRST LINE MUST be "Subject: ...".
                    - Include one concrete hook from JD/company; one clear, low-friction CTA.
                    - Preserve placeholders: {{{{recipient_name}}}}, {{{{your_name}}}}, {{{{role}}}}, {{{{company}}}}, {{{{job_link}}}}.

                    User feedback
                    {fb}

                    Original messages (JSON)
                    {json.dumps(original_msgs, ensure_ascii=False, indent=2)}

                    Support context
                    - Candidate: {candidate_name}
                    - CV (excerpt): {cv}
                    - JD (excerpt): {jd}

                    Output
                    Return ONE valid JSON object ONLY (no markdown, no commentary) with EXACTLY these keys:
                    "referral_request": "<LinkedIn DM text>"
                    "cold_email": "Subject: ...\\n\\n<Email body>"
                  """.strip()

        raw = self.call_openai([AgentMessage("user", prompt)], max_tokens=1000)
        data = self._safe_json(raw)

        rr = (data.get("referral_request") or "").strip()
        ce = (data.get("cold_email") or "").strip()

        # guided retry if missing/empty
        reasons = []
        if not rr: reasons.append("missing 'referral_request'")
        if not ce: reasons.append("missing 'cold_email'")
        if reasons:
            data2 = self._guided_retry(", ".join(reasons))
            rr = (data2.get("referral_request") or rr).strip()
            ce = (data2.get("cold_email") or ce).strip()

        # If still empty after retry, preserve last good content;
        # if even that is empty (first pass was broken), fall back to templates.
        if not rr:
            rr = (original_msgs.get("referral_request") or "").strip()
        if not ce:
            ce = (original_msgs.get("cold_email") or "").strip()

        if not rr or not ce:
            fb_msgs = self._fallback_messages(job.get("url", ""))
            rr = rr or fb_msgs["referral_request"]
            ce = ce or fb_msgs["cold_email"]

        # enforce subject + soft caps
        ce = self._ensure_subject(ce)
        if self._wc(rr) > 160: rr = " ".join(rr.split()[:160])
        if self._wc(ce) > 220: ce = " ".join(ce.split()[:220])

        return {"referral_request": rr, "cold_email": ce}


## CV review agent

In [14]:
class CVReviewAgent(BaseAgent):

    def run(self, cv_text: str, job: Dict[str, str]) -> Dict[str, Any]:
        prompt = f"""
                    You are an executive recruiter and career coach with deep expertise in both human psychology and Applicant Tracking Systems (ATS). Your advice is strategic, prioritizing the 20% of changes that will yield 80% of the impact.

                    Your goal is to analyze the provided CV against the job description and give the candidate a clear, actionable plan.

                    First, conduct this internal analysis:
                    1.  **ATS Screen:** Scan for critical keyword alignment between the CV and the job description. Are there glaring omissions?
                    2.  **Human Screen (6-Second Test):** Could a human recruiter, in 6 seconds, understand the candidate's value proposition for this specific role? Is the impact clear and quantified?
                    3.  **Strategic Fit:** Does the candidate's experience logically lead to this role? Is it a step up, a pivot, or a lateral move? How should the CV be framed to tell the right story?

                    Based on your analysis, produce a STRICT JSON output with the following enhanced schema. Be direct, insightful, and encouraging.

                    Job Posting:
                    Title: {job.get('title_raw','')}
                    Description: {job.get('description','')}

                    Candidate CV:
                    {cv_text}

                    JSON Output Schema:
                    {{
                      "verdict": "Strong Fit - Apply Now" | "Good Fit - Minor Revisions Recommended" | "Potential Fit - Strategic Repositioning Needed" | "Poor Fit - Reconsider",
                      "overall_confidence": number, // A score from 0.0 to 1.0
                      "summary_analysis": {{
                          "strengths": "What works well in the CV for this specific role. Be specific.",
                          "weaknesses": "What is currently holding the CV back from being truly compelling.",
                          "strategic_angle": "The core narrative the candidate should emphasize to stand out (e.g., 'Leverage your project management skills to pivot from backend to a full-stack leadership role')."
                      }},
                      "keyword_optimization": {{
                          "missing_keywords": ["List of critical, context-aware keywords missing from the CV."],
                          "overused_keywords": ["List of keywords that might be seen as 'stuffing' and should be used more naturally."]
                      }},
                      "prioritized_edits": [
                        {{
                          "priority": "High" | "Medium" | "Low",
                          "section": "Summary" | "Experience > Role at Company" | "Projects" | "Skills",
                          "suggestion": "A clear, actionable suggestion for this section.",
                          "reasoning": "Briefly explain WHY this change is important (e.g., 'To pass the ATS screen' or 'To catch the hiring manager's eye').",
                          "example_bullets": [
                            "A rewritten bullet point demonstrating the suggestion.",
                            "Another example bullet."
                          ]
                        }}
                      ]
                    }}
                """
        raw = self.call_openai(
            [AgentMessage(role="user", content=prompt)],
            temperature=0.25,
            max_tokens=1200
        )

        # Parse JSON robustly
        try:
            # try direct load
            data = json.loads(raw)
        except Exception:
            try:
                # grab last JSON object/array from the text
                m = re.search(r"\{[\s\S]*\}\s*$", raw)
                data = json.loads(m.group(0)) if m else {}
            except Exception:
                data = {}

        # Fallback if parsing failed or missing keys
        if not isinstance(data, dict) or "decision" not in data:
            data = {
                "decision": "revise_cv",
                "rationale": (raw or "Model returned a non-JSON response. Defaulting to 'revise_cv'.")[:600],
                "missing_keywords": [],
                "edits": [],
                "confidence": 0.5
            }

        # Light sanity fixes
        data.setdefault("missing_keywords", [])
        data.setdefault("edits", [])
        try:
            c = float(data.get("confidence", 0.5))
            data["confidence"] = max(0.0, min(1.0, c))
        except Exception:
            data["confidence"] = 0.5

        return data


## Orchestrator agent

In [15]:
@dataclass
class Orchestrator:
    cover: "CoverLetterAgent"
    net: "NetworkingAgent"
    review: "CVReviewAgent"

    def route(
        self,
        option: str,
        cv_pdf_path: str,
        job_url: str,
        jd_text_optional: str = "",
    ) -> Dict[str, Any]:
        """
        Fan-out to the requested agent and return a payload the UI can use.
        NOTE: We no longer support an 'output_mode' toggle. For cover letters we always
        return both the rendered text AND a generated PDF path.

        Returns (per option):

        - cover_letter:
            {
              "type": "cover_letter",
              "cv_text": <str>, "job": <dict>, "candidate": <dict>,
              "letter": <str>, "orig_letter": <str>, "pdf_path": <str>,
              "messages": None, "orig_messages": None,
              "needs_jd_text": <bool>
            }

        - networking:
            {
              "type": "networking",
              "cv_text": <str>, "job": <dict>, "candidate": <dict>,
              "letter": None, "orig_letter": None, "pdf_path": None,
              "messages": {"referral_request": <str>, "cold_email": <str>},
              "orig_messages": { ...copy of messages... },
              "needs_jd_text": <bool>
            }

        - cv_review:
            {
              "type": "cv_review",
              "cv_text": <str>, "job": <dict>, "candidate": <dict>,
              "review": <dict>,
              "letter": None, "orig_letter": None, "pdf_path": None,
              "messages": None, "orig_messages": None,
              "needs_jd_text": <bool>
            }
        """

        # Helper functions
        cv_text = extract_text_from_pdf(cv_pdf_path)
        candidate = sniff_contact(cv_text)  # best-effort name/email/phone
        job = RoleScraper.scrape(job_url)

        # Detect thin/gated pages (e.g., LinkedIn or JS-heavy boards)
        raw_desc = (job.get("description") or "").strip()
        needs_jd = len(raw_desc) < 200 and not (jd_text_optional and jd_text_optional.strip())

        # If the user supplied JD text and scrape looked thin, use it
        if jd_text_optional and len(raw_desc) < 200:
            job["description"] = jd_text_optional

        opt = (option or "").lower().strip()

        # Route
        if opt == "cover_letter":
            # New flow: CoverLetterAgent always returns both text + pdf_path
            result = self.cover.run(
                cv_text=cv_text,
                job=job,
                candidate=candidate,
            )
            return {
                "type": "cover_letter",
                "cv_text": cv_text, "job": job, "candidate": candidate,
                "letter": result["letter"],            # current draft shown in UI
                "orig_letter": result["letter"],       # keep original for optional "reset" features
                "pdf_path": result.get("pdf_path"),    # used by Download button
                "messages": None, "orig_messages": None,
                "needs_jd_text": needs_jd,
            }

        elif opt == "networking":
            msgs = self.net.run(cv_text=cv_text, job=job)  # {"referral_request","cold_email"}
            return {
                "type": "networking",
                "cv_text": cv_text, "job": job, "candidate": candidate,
                "letter": None, "orig_letter": None, "pdf_path": None,
                "messages": msgs,                        # for UI preview
                "orig_messages": dict(msgs),             # preserve first draft for optional "reset"
                "needs_jd_text": needs_jd,
            }

        elif opt == "cv_review":
            rev = self.review.run(cv_text=cv_text, job=job)
            return {
                "type": "cv_review",
                "cv_text": cv_text,
                "job": job,
                "candidate": candidate,
                "review": rev,
                "letter": None,
                "orig_letter": None,
                "pdf_path": None,
                "messages": None,
                "orig_messages": None,
                "needs_jd_text": needs_jd,
            }

        else:
            raise ValueError("Unknown option. Use one of: cover_letter, networking, cv_review.")


In [16]:
orch = Orchestrator(
    cover=CoverLetterAgent(name="cover_letter", system_prompt="You write precise, authentic cover letters tailored to the role."),
    net=NetworkingAgent(name="networking", system_prompt="You craft concise, human referral messages that get responses."),
    review=CVReviewAgent(name="cv_review", system_prompt="You are an ATS-savvy reviewer who gives actionable, minimal edits.")
)


## Critic agent

### Routes user feedback to the desired agents



In [17]:
class ReviewCoordinator:
    def __init__(self, cover_agent, net_agent):
        self.cover = cover_agent
        self.net = net_agent

    def handle(self, state: dict, satisfaction: str, feedback: str) -> dict:
        """
        Yes  -> end loop (done=True)
        No   -> revise the LATEST content (letter/messages) with feedback
        """
        if not state or not state.get("type"):
            return {**(state or {}), "message": "Run a generator first.", "done": False}

        # Treat anything other than explicit "Yes" as a request to iterate
        if satisfaction == "Yes":
            return {**state, "message": "✅ Saved.", "done": True}

        fb = (feedback or "").strip()
        fb = fb or "Please make it clearer, more specific, and better aligned to the role."

        try:
            if state["type"] == "cover_letter":
              improved = self.cover.revise(...)
              import re, tempfile, pathlib, uuid
              safe = re.sub(r"[^A-Za-z0-9]+","_", state.get("candidate",{}).get("name","Candidate")).strip("_") or "Candidate"
              pdf_path = str(pathlib.Path(tempfile.gettempdir()) / f"{safe}_{uuid.uuid4().hex[:8]}_Cover_Letter.pdf")
              save_cover_letter_pdf(improved, pdf_path)

              return {**state, "letter": improved, "pdf_path": pdf_path,
                      "message": "🔁 Updated per your feedback.", "done": False}


            elif state["type"] == "networking":
                # Revise from current messages (latest)
                improved = self.net.revise(
                    original_msgs=state.get("messages", {}),
                    feedback=fb,
                    cv_text=state.get("cv_text", ""),
                    job=state.get("job", {}),
                    candidate_name=state.get("candidate", {}).get("name", "Candidate"),
                )
                state["messages"] = improved
                return {**state, "message": "🔁 Updated per your feedback.", "done": False}

            else:
                return {**state, "message": "Unknown state.", "done": False}

        except Exception as e:
            # Don’t break the loop—return the previous content with an error note
            return {**state, "message": f"⚠️ Couldn’t apply revision: {e}", "done": False}


In [18]:
# Instantiate after you've built `orch` (which holds your agents)
REVIEW = ReviewCoordinator(cover_agent=orch.cover, net_agent=orch.net)
print("REVIEW coordinator ready ✔️")

REVIEW coordinator ready ✔️


## Gradio block to bring your code to life

In [19]:
import gradio as gr

In [23]:
# pretty formatters
def format_networking(msgs: dict) -> str:
    rr = msgs.get("referral_request", "").strip()
    ce = msgs.get("cold_email", "").strip()
    parts = []
    if rr:
        parts.append(f"### Referral request (DM)\n{rr}")
    if ce:
        parts.append(f"### Cold email\n{ce}")
    return "\n\n".join(parts) if parts else "_No messages returned._"

def format_review(r: dict) -> str:
    decision = r.get("decision","")
    badge = "✅ Apply now" if decision == "apply_now" else "🛠️ Revise CV first"
    md = [f"**Decision:** {badge}",
          f"**Confidence:** {r.get('confidence','—')}",
          "",
          f"**Why:** {r.get('rationale','').strip() or '—'}"]
    kws = r.get("missing_keywords") or []
    if kws:
        md += ["", "**Add these keywords (where genuine):**", "- " + "\n- ".join(kws)]
    edits = r.get("edits") or []
    if edits:
        md += ["", "**Targeted edits:**"]
        for e in edits:
            sec = e.get("section","(section)")
            sug = e.get("suggestion","")
            bullets = e.get("example_bullets") or []
            md += [f"- *{sec}*: {sug}"]
            for b in bullets[:3]:
                md += [f"    • {b}"]
    return "\n".join(md)

# state helpers
def clear_state():
    return {
        "type": None,
        "cv_text": "", "job": {}, "candidate": {},
        "letter": None, "orig_letter": None, "pdf_path": None,
        "messages": None, "orig_messages": None
    }

def big_notice(title: str, body: str) -> str:
    return f"""
<div style="
  margin: 18px auto;
  max-width: 880px;
  padding: 18px 22px;
  border: 2px solid #f59e0b;
  background: #fff7ed;
  color: #7c2d12;
  border-radius: 12px;
  text-align: center;
  font-size: 20px;
  line-height: 1.5;
">
  <div style="font-weight: 800; font-size: 22px; margin-bottom: 6px;">{title}</div>
  <div>{body}</div>
</div>
""".strip()


def run_flow(cv_pdf, job_url, option, jd_text, state):
    if cv_pdf is None:
        return "Please upload a PDF CV.", None, clear_state(), gr.update(value=None), gr.update(value="")
    if not job_url:
        return "Please paste a job URL.", None, clear_state(), gr.update(value=None), gr.update(value="")

    # Call orchestrator
    out = orch.route(
        option=option,
        cv_pdf_path=cv_pdf.name,
        job_url=job_url,
        jd_text_optional=jd_text or ""
    )
    '''
    # Ask for JD paste if the page was gated/empty
    if out.get("needs_jd_text"):
        msg = ("The job page looks gated/empty. Paste the job description into "
               "the 'Optional JD text' box and click Generate again.")
        return msg, None, clear_state(), gr.update(value=None), gr.update(value="")
    # Prompt user to paste JD if the page was gated/empty
    '''

    if out.get("needs_jd_text"):
        alert_html = big_notice(
            "Job page looks gated or empty",
            "Paste the job description into the <b>Optional JD text</b> box and click <b>Generate</b> again."
        )
        return (
            alert_html,
            gr.update(value=None),
            gr.update(value=None),
            clear_state(),
            gr.update(value=None),
            gr.update(value="")
        )


    if out["type"] == "cover_letter":
        display = out["letter"]
        file_path = out.get("pdf_path")  # always present for cover letters
        new_state = {
            "type": "cover_letter",
            "cv_text": out["cv_text"], "job": out["job"], "candidate": out["candidate"],
            "letter": out["letter"], "orig_letter": out["letter"],
            "pdf_path": out.get("pdf_path"),
            "messages": None, "orig_messages": None
        }
        return display, file_path, new_state, gr.update(value=None), gr.update(value="")

    elif out["type"] == "networking":
        pretty = format_networking(out["messages"])
        md_path = str(pathlib.Path(tempfile.gettempdir()) / "networking_messages.md")
        with open(md_path, "w", encoding="utf-8") as f:
            f.write(pretty)
        new_state = {
            "type": "networking",
            "cv_text": out["cv_text"], "job": out["job"], "candidate": out["candidate"],
            "letter": None, "orig_letter": None, "pdf_path": None,
            "messages": out["messages"], "orig_messages": dict(out["messages"])
        }
        return pretty, md_path, new_state, gr.update(value=None), gr.update(value="")

    else:  # cv_review
        pretty = format_review(out["review"])
        md_path = str(pathlib.Path(tempfile.gettempdir()) / "cv_review.md")
        with open(md_path, "w", encoding="utf-8") as f:
            f.write(pretty)
        return pretty, md_path, clear_state(), gr.update(value=None), gr.update(value="")

def review_step(satisfaction, feedback, state):
    if not state or not state.get("type"):
        return "Run one of the generators first.", None, state, gr.update(value=None), gr.update(value="")

    updated = REVIEW.handle(state, satisfaction, feedback)

    # ended?
    if updated.get("done"):
        if updated["type"] == "cover_letter":
            disp = f"✅ Saved.\n\n{updated['letter']}"
            fpath = updated.get("pdf_path")
            return disp, fpath, clear_state(), gr.update(value=None), gr.update(value="")
        elif updated["type"] == "networking":
            pretty = format_networking(updated["messages"])
            md_path = str(pathlib.Path(tempfile.gettempdir()) / "networking_messages.md")
            with open(md_path, "w", encoding="utf-8") as f:
                f.write(pretty)
            return f"✅ Saved.\n\n{pretty}", md_path, clear_state(), gr.update(value=None), gr.update(value="")
        else:
            return "✅ Saved.", None, clear_state(), gr.update(value=None), gr.update(value="")

    # still iterating
    if updated["type"] == "cover_letter":
        fpath = updated.get("pdf_path")
        return updated["letter"], fpath, updated, gr.update(value=None), gr.update(value="")
    if updated["type"] == "networking":
        pretty = format_networking(updated["messages"])
        md_path = str(pathlib.Path(tempfile.gettempdir()) / "networking_messages.md")
        with open(md_path, "w", encoding="utf-8") as f:
            f.write(pretty)
        return pretty, md_path, updated, gr.update(value=None), gr.update(value="")

    return updated.get("message","Unknown state."), None, updated, gr.update(value=None), gr.update(value="")



In [24]:
# ---------- UI ----------
with gr.Blocks(title="Job Hunt Agents") as demo:
    gr.Markdown("## Job Hunt Agents\nUpload your CV (PDF) + job link → choose what to generate.")

    with gr.Row():
        cv_pdf = gr.File(label="CV (PDF)", file_types=[".pdf"])
        job_url = gr.Textbox(label="Job link", placeholder="https://...")

    jd_text = gr.Textbox(
        label="Optional JD text (if the link is gated/login-only)",
        lines=5,
        placeholder="Paste the job description here if the page is behind login."
    )

    option = gr.Radio(
        choices=[
            ("Cover letter", "cover_letter"),
            ("Referral outreach (DM + cold email)", "networking"),
            ("CV review & tips", "cv_review"),
        ],
        value="cover_letter",
        label="What do you want?"
    )

    run_btn = gr.Button("Generate", variant="primary")

    output_md = gr.Markdown(label="Result")
    file_out = gr.File(label="Download (PDF/Markdown)", interactive=False)

    gr.Markdown("### Review the result")
    satisfaction = gr.Radio(
        choices=["Yes","No"], value=None,
        label="Are you satisfied with the output?"
    )
    feedback = gr.Textbox(
        label="If 'No', what should we change?",
        lines=4,
        placeholder="e.g., shorter intro, stronger metrics, warmer tone"
    )
    apply_btn = gr.Button("Apply review / Improve")

    state = gr.State(clear_state())

    # No pdf/text toggle; wiring matches new run_flow signature
    run_btn.click(
        fn=run_flow,
        inputs=[cv_pdf, job_url, option, jd_text, state],
        outputs=[output_md, file_out, state, satisfaction, feedback]
    )

    apply_btn.click(
        fn=review_step,
        inputs=[satisfaction, feedback, state],
        outputs=[output_md, file_out, state, satisfaction, feedback]
    )

In [25]:
demo.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://97659a577e0f01cb6b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


    Output components:
        [markdown, file, state, radio, textbox]
    Output values returned:
        ["<div style="
  margin: 18px auto;
  max-width: 880px;
  padding: 18px 22px;
  border: 2px solid #f59e0b;
  background: #fff7ed;
  color: #7c2d12;
  border-radius: 12px;
  text-align: center;
  font-size: 20px;
  line-height: 1.5;
">
  <div style="font-weight: 800; font-size: 22px; margin-bottom: 6px;">Job page looks gated or empty</div>
  <div>Paste the job description into the <b>Optional JD text</b> box and click <b>Generate</b> again.</div>
</div>", {'value': None, '__type__': 'update'}, {'value': None, '__type__': 'update'}, {'type': None, 'cv_text': '', 'job': {}, 'candidate': {}, 'letter': None, 'orig_letter': None, 'pdf_path': None, 'messages': None, 'orig_messages': None}, {'value': None, '__type__': 'update'}, {'value': '', '__type__': 'update'}]


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://97659a577e0f01cb6b.gradio.live


