In [18]:
from ollama import chat
from pydantic import BaseModel, RootModel, Field
from pprint import pprint
import json

from utils.with_structured_output import with_structured_output

In [5]:
with open("output/resume_sections.json", "r") as file:
    resume_sections = json.load(file)

In [13]:
class Experience(BaseModel):
    company: str = Field(..., alias="Company")
    role: str = Field(..., alias="Role")
    contributions: list[str] = Field(..., alias="Contributions")

In [14]:
EXPERIENCE_EXTRACTION_PROMPT = """
You are an expert at parsing resumes. Given some resume text, your job is to extract information about the candidate's work experience and format it as a list of JSON objects, where each object has the following format:
    {{
        "Company": "<company>",
        "Role": "<applicant's role at the company>",
        "Contributions": ["list", "of", "contributions", "in", "the", "role"]
    }}
    
The extracted information must be **explicitly contained in the resume.**

Resume text:
{resume_text}

Output:
"""

In [20]:
experience = with_structured_output(
    EXPERIENCE_EXTRACTION_PROMPT.format(resume_text=resume_sections["Experience"]),
    RootModel[list[Experience]])
pprint(experience.model_dump())

[{'company': 'Sandia National Laboratories',
  'contributions': ['Developed knowledge graph (KG) generation pipeline with '
                    'internal LLM microservices to allow multi-hop reasoning '
                    'in 3-stage retrieval augmented generation (RAG) pipeline',
                    'Extracted 30+ domain-specific seed topics from text '
                    'corpus with BERTopic for KG subgraph creation',
                    'Achieved100%schema-compliantLLMoutputsviaprompt '
                    'engineering andgrammar-contrained decoding',
                    'Packaged KG generation logic into reusable, '
                    'object-oriented Python modules used by 30 developers'],
  'role': 'AI/ML Intern'},
 {'company': 'Sandia National Laboratories',
  'contributions': ['Redesigned 30 year old Java data analysis suite '
                    'architecture, cutting developer onboarding time by 3 '
                    'weeks',
                    'Used MATLAB profiler to