In [5]:
import requests
from bs4 import BeautifulSoup
from langchain.chat_models import init_chat_model
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from llama_cloud_services import LlamaParse
from typing import List, Optional

load_dotenv();

  from llama_cloud_services import LlamaParse


In [None]:
# api_key = ""

In [None]:
parser_no_llm = LlamaParse(
    # api_key=api_key,
    parse_mode="parse_page_without_llm",
)
result = parser_no_llm.parse("Resume.pdf")

text = ""
for page in result.pages:
    text = text + page.text

Started parsing the file under job_id e09c5deb-dc37-4a66-ba1e-25263f6891b8


In [8]:
class Experience(BaseModel):
    company: str
    role: str
    location: Optional[str] = None
    start_date: Optional[str] = None
    end_date: Optional[str] = None
    description: Optional[str] = None
    technologies: List[str] = []
    bullets: List[str]


class Project(BaseModel):
    title: str
    description: Optional[str] = None
    technologies: List[str] = []
    bullets: List[str]
    link: Optional[str] = None


class Education(BaseModel):
    institution: str
    degree: str
    field_of_study: Optional[str] = None
    location: Optional[str] = None
    start_date: Optional[str] = None
    end_date: Optional[str] = None
    gpa: Optional[str] = None
    honors: Optional[List[str]] = []


class ResumeSchema(BaseModel):
    summary: str
    skills: list[str]
    experience: list[Experience]
    projects: list[Project]
    education: list[Education]

In [None]:
model = init_chat_model("gpt-5-nano")
model_with_structure = model.with_structured_output(ResumeSchema)
messages = [
    {
        "role": "system",
        "content": """
You are a precise resume information extractor.

Your task is to convert parsed resume text into structured JSON 
following the provided schema.

STRICT RULES:
- Extract ONLY information explicitly present in the text.
- Do NOT invent, infer, or embellish.
- If a section is missing, return an empty list.
- Keep bullet points concise (one achievement per bullet).
- Extract technologies as atomic skill keywords (1-3 words max).
- Do not include phrases like "experience with" in skills.
- Do not duplicate items.
- Preserve factual accuracy exactly as written.
""",
    },
    {
        "role": "user",
        "content": f"""
Convert the following parsed resume text into structured JSON.

Resume Text:
--------------------
{text}
--------------------

Return structured data only.
""",
    },
]

response = model_with_structure.invoke(messages)
resume_json = response.model_dump()

In [10]:
class JDResponseSchema(BaseModel):
    location: str = Field(description="Job Location")
    responsibilities: list[str] = Field(description="List of responsibilities")
    must_have_qualifications: list[str] = Field(
        description="List of must have qualifications"
    )
    nice_to_have_qualifications: list[str] = Field(
        description="List of nice to have qualifications"
    )
    keywords: list[str] = Field(description="List of keywords")

In [11]:
target_url = "https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/4324865656"
resp = requests.get(target_url)

soup = BeautifulSoup(resp.text, "html.parser")

try:
    company = (
        soup.find("div", {"class": "top-card-layout__card"})
        .find("a")
        .find("img")
        .get("alt")
        .strip()
    )
except:
    company = None

try:
    job_title = (
        soup.find("div", {"class": "top-card-layout__entity-info"})
        .find("a")
        .text.strip()
    )
except:
    job_title = None

description_div = soup.find("div", {"class": "show-more-less-html__markup"})
description_html = str(description_div) if description_div else ""

In [None]:
model_input = f"""
Extract:
- location
- responsibilities (short bullet phrases)
- must-have skills (atomic, normalized skill keywords only)
- nice-to-have skills (atomic, normalized skill keywords only)
- keywords (important technologies or concepts)

For skills:
- Return only short skill names (1-3 words max)
- No sentences
- No explanations
- No duplicates
- No filler words like "experience with"
- Normalize similar items (e.g., "CI/CD pipelines" â†’ "CI/CD")

From this HTML:

{description_html}
"""

In [13]:
model = init_chat_model("gpt-5-nano")
model_with_structure = model.with_structured_output(JDResponseSchema)

messages = [
    {"role": "system", "content": "You extract structured job data from HTML."},
    {"role": "user", "content": model_input},
]

structured_data = model_with_structure.invoke(messages)

jd_json = {"company": company, "title": job_title, **structured_data.model_dump()}

In [14]:
must_have_skills = jd_json["must_have_qualifications"]
nice_to_have_skills = jd_json["nice_to_have_qualifications"]
resume_skills = resume_json["skills"]


def normalize(skills):
    return {s.strip().lower() for s in skills if s.strip()}


must_have = normalize(must_have_skills)
nice_to_have = normalize(nice_to_have_skills)
resume = normalize(resume_skills)

matched_must = must_have & resume
missing_must = must_have - resume

matched_nice = nice_to_have & resume
missing_nice = nice_to_have - resume

must_score = len(matched_must) / max(len(must_have), 1)
nice_score = len(matched_nice) / max(len(nice_to_have), 1)

final_score = (0.7 * must_score) + (0.3 * nice_score)

result = {
    "final_score": round(final_score, 3),
    "must_have": {
        "matched": list(matched_must),
        "missing": list(missing_must),
        "score": round(must_score, 3),
    },
    "nice_to_have": {
        "matched": list(matched_nice),
        "missing": list(missing_nice),
        "score": round(nice_score, 3),
    },
}

In [23]:
latex_format = r"""
\begin{document}

\newcommand{\AND}{\unskip
    \cleaders\copy\ANDbox\hskip\wd\ANDbox
    \ignorespaces
}
\newsavebox\ANDbox
\sbox\ANDbox{}

\begin{header}
    \fontsize{15 pt}{15 pt}
    \textbf{FULL NAME}

    \normalsize
    \mbox{\hrefWithoutArrow{mailto:EMAIL}{EMAIL}}%
    \kern 0.25 cm%
    |
    \AND%
    \kern 0.25 cm%
    \mbox{\hrefWithoutArrow{tel:PHONE}{PHONE}}%
    \kern 0.25 cm%
    \AND%
    \kern 0.25 cm%
    |
    \mbox{\hrefWithoutArrow{LINKEDIN_URL}{LinkedIn}}%
    \kern 0.25 cm%
    \AND%
    \kern 0.25 cm%
    |
    \mbox{\hrefWithoutArrow{GITHUB_URL}{GitHub}}%
\end{header}

% =====================
\section{Education}

\textbf{INSTITUTION NAME} \hfill \textbf{DATE RANGE} \\
DEGREE NAME \hfill GPA: X.XX/X.XX

\vspace{0.1cm}

% =====================
\section{Skills}

\textbf{Languages:} SKILLS HERE \\
\textbf{Data Analysis \& Visualization:} SKILLS HERE \\
\textbf{Machine Learning:} SKILLS HERE \\
\textbf{Deep Learning \& NLP:} SKILLS HERE \\
\textbf{Generative AI \& LLM Systems:} SKILLS HERE \\
\textbf{Backend \& Databases:} SKILLS HERE \\
\textbf{Cloud \& MLOps:} SKILLS HERE

\vspace{0.1cm}

% =====================
\section{Experience}

\textbf{ROLE | COMPANY | LOCATION} \hfill \textbf{DATE RANGE}
\begin{highlights}
    \item BULLET POINT
    \item BULLET POINT
    \item BULLET POINT
\end{highlights}

\vspace{0.1cm}

\textbf{ROLE | COMPANY | LOCATION} \hfill \textbf{DATE RANGE}
\begin{highlights}
    \item BULLET POINT
    \item BULLET POINT
    \item BULLET POINT
\end{highlights}

\vspace{0.1cm}

% =====================
\section{Projects}

\textbf{\href{PROJECT_LINK}{PROJECT TITLE | TECHNOLOGIES}}
\begin{highlights}
    \item BULLET POINT
    \item BULLET POINT
    \item BULLET POINT
    \item BULLET POINT
\end{highlights}

\vspace{0.1cm}

\textbf{\href{PROJECT_LINK}{PROJECT TITLE | TECHNOLOGIES}}
\begin{highlights}
    \item BULLET POINT
    \item BULLET POINT
    \item BULLET POINT
    \item BULLET POINT
\end{highlights}

\vspace{0.1cm}

\textbf{\href{PROJECT_LINK}{PROJECT TITLE | TECHNOLOGIES}}
\begin{highlights}
    \item BULLET POINT
    \item BULLET POINT
    \item BULLET POINT
    \item BULLET POINT
\end{highlights}

\end{document}
"""

messages = [
    {
        "role": "system",
        "content": """
You are a professional resume tailoring assistant.

Your task is to tailor a master resume to a job description.

STRICT RULES:
- Use ONLY information provided in the master resume JSON.
- DO NOT invent new skills, projects, achievements, or technologies.
- DO NOT exaggerate impact.
- DO NOT add experience that does not exist.
- If something is not present in the master resume, you must not include it.
- Keep factual accuracy exactly as written.

Editing Rules:
1. Reorder and select skills:
- Keep ONLY skills that are explicitly mentioned in the Job Description.
- Reorder them based on importance and frequency in the JD.

2. Select and reorder EXACTLY 3 projects:
- Choose from the existing project list only.
- Rank them based on relevance to the JD (skills + technologies overlap).
- Rewrite bullet points to emphasize relevant skills.
- Keep each bullet short and concise.
- Maximum 4 bullet points per project.
- Do not add new technical claims.
- If you cannot find 3 relevant projects, choose the 3 most related ones without inventing relevance.

3. Keep all other sections (experience, education, etc.) unchanged unless minor wording adjustments improve alignment with the JD.
- Do not fabricate.
- Do not remove roles.
- Do not create new content.

Output Rules:
- Output the final resume strictly formatted using the provided LaTeX template.
- Replace only the content sections.
- Do not include explanations.
- Do not include JSON.
- Output LaTeX only.
    """,
    },
    {
        "role": "user",
        "content": f"""
MASTER RESUME JSON:
--------------------
{resume_json}
--------------------

JOB DESCRIPTION JSON:
--------------------
{jd_json}
--------------------

LATEX TEMPLATE:
--------------------
{latex_format}
--------------------

Generate the tailored resume now.
""",
    },
]

In [26]:
model = init_chat_model("gpt-4.1-mini")

response = model.invoke(messages)