In [1]:
from ollama import chat
from pydantic import BaseModel, RootModel, Field
from pprint import pprint
import json

from utils.with_structured_output import with_structured_output

In [2]:
with open("../output/parsed_resume.json", "r") as file:
    resume_sections = json.load(file)

In [3]:
class Experience(BaseModel):
    company: str = Field(..., alias="Company")
    role: str = Field(..., alias="Role")
    contributions: list[str] = Field(..., alias="Contributions")
    
class Experiences(BaseModel):
    experiences: list[Experience] = Field(..., alias="Experiences")
    yoe: float = Field(..., alias="Total Years of Experience")

In [7]:
EXPERIENCE_EXTRACTION_PROMPT = """
You are an expert at parsing resumes. Given some resume text, your job is to extract information about the candidate's work experience and format it as a list of JSON objects, where each object has the following format:
    {{
        "Experiences": [
            {{
                "Company": "<company>",
                "Role": "<applicant's role at the company>",
                "Contributions": ["list", "of", "contributions", "in", "the", "role"]
            }},
            ...
        ],
        "Total Years of Experience": <Total Years of Experience> 
    }}
    
The extracted information must be **explicitly contained in the resume.**

Calculate "Total Years of Experience" by summing up the duration of all experiences, rounded to the nearest quarter-year. **Note that overlapping timeframes should not double-counted.**

Resume text:
{resume_text}

Output:
"""

In [8]:
experience = with_structured_output(
    EXPERIENCE_EXTRACTION_PROMPT.format(resume_text=resume_sections["Experience"]),
    Experiences)
pprint(experience)

{'Experiences': [{'Company': 'DIVE (Data Integration Visualization and '
                             'Exploration) Lab',
                  'Contributions': ['Enhanced the QHNet model’s Hamiltonian '
                                    'matrix predictions by experimenting with '
                                    'various radial basis functions, leading '
                                    'to a 5% increase in model accuracy and '
                                    'efficiency in material property '
                                    'simulations.',
                                    'Currently tying to utilize OMat24 for '
                                    'material property prediction to support '
                                    'novel materials research, facilitating '
                                    'faster identification of viable compounds '
                                    'for engineering applications.'],
                  'Role': 'Undergraduate Researcher'},
