In [5]:
import json
from ollama import chat
from pydantic import BaseModel, RootModel, Field
from pprint import pprint

In [3]:
with open("output/resume_sections.json", "r") as file:
    resume_sections = json.load(file)

In [4]:
resume_sections

{'Miscellaneous': ['(832) 416-3570 | kevzhang2022@gmail.com | linkedin.com/in/kevinkz | github.com/n1v3x2'],
 'Education': 'Texas A&M University May 2026\nBS in Computer Science, Minor in Statistics and Math College Station, TX\nCumulative GPA: 4.0/4.0\nHonors: Dean’s Honor Roll, Engineering Honors (EH), Dean’s Excellence Award Semi-finalist\nCoursework: Data Structures & Algorithms, Software Engineering, Computer Systems, Discrete Math, Linear Algebra',
 'Experience': 'AI/ML Intern Aug 2024 – Dec 2024\nSandia National Laboratories Remote\n• Developed knowledge graph (KG) generation pipeline with internal LLM microservices to allow multi-hop\nreasoning in 3-stage retrieval augmented generation (RAG) pipeline\n• Extracted 30+ domain-specific seed topics from text corpus with BERTopic for KG subgraph creation\n• Achieved100%schema-compliantLLMoutputsviaprompt engineering andgrammar-contrained decoding\n• Packaged KG generation logic into reusable, object-oriented Python modules used by 3

In [6]:
education = resume_sections["Education"]

In [1]:
class School(BaseModel):
    name: str           = Field(..., alias="Name")
    major: str          = Field(..., alias="Major")
    minors: list[str]   = Field(None, alias="Minors")
    gpa: float          = Field(None, alias="GPA")
    grad_year: int      = Field(..., alias="Graduation Year")

class Education(BaseModel):
    schools: list[School]   = Field(..., alias="Schools")
    honors: list[str]       = Field(..., alias="Honors")

NameError: name 'BaseModel' is not defined

In [38]:
EDUCATION_EXTRACTION_PROMPT = """
You are an expert resume parser. Given some resume text, your job is to parse the following information and format it as follows:

{{
    "Schools": [
        {{
            "Name": "name of school",
            "Major": "major field of study",
            "Minors": ["list", "of", "minors"] or null,
            "GPA": <GPA> or null,
            "Graduation Year": <graduation_year>
        }},
        ...
    ],
    "Honors": ["list", "of", "honors"],
}}

Notes:
1. If there are no minors, set "Minors" to null.
2. If there is no GPA listed, set "GPA" to null.
3. If any school does not have a graduation year listed, omit the school from the output.
4. Output the full name of all degrees, e.g., "BS in Computer Science", "M.S. in Information Science".
5. If the resume does not contain information for one of the sections, return an empty list for that section.

Extracted information must be **explicitly contained in the resume.**

Resume text:
{resume_text}

Output:
"""

In [39]:
def parse_education(education_text: str) -> Education:
    response = chat(
        model="llama3.1",
        messages=[
            {
                "role": "user",
                "content": EDUCATION_EXTRACTION_PROMPT.format(resume_text=education_text)
            }
        ],
        format=Education.model_json_schema()
    )

    return Education.model_validate_json(response.message.content)

In [40]:
edu_info = parse_education(education)
pprint(edu_info.model_dump())

{'honors': ['Dean’s Honor Roll',
            'Engineering Honors (EH)',
            'Dean’s Excellence Award Semi-finalist'],
 'schools': [{'gpa': 4.0,
              'grad_year': 2026,
              'major': 'BS in Computer Science',
              'minors': ['Statistics', 'Math'],
              'name': 'Texas A&M University'}]}


In [41]:
with open("../sample-data/job-desc.txt", "r") as file:
    job_desc = file.read()
    
with open("../sample-data/resumes.json", "r") as file:
    resumes = json.load(file)

In [42]:
for resume in resumes:
    pprint(resume)
    print("---")
    pprint(parse_education(resume).model_dump())
    print()

('John Doe\n'
 'Software Engineer\n'
 'john.doe@example.com | (123) 456-7890 | linkedin.com/in/johndoe\n'
 '\n'
 'Summary:\n'
 'Experienced software engineer with expertise in developing scalable web '
 'applications, strong knowledge of Python and JavaScript, and a passion for '
 'solving complex problems.\n'
 '\n'
 'Skills:\n'
 '- Programming Languages: Python, JavaScript, Java\n'
 '- Frameworks: Django, React, Spring Boot\n'
 '- Tools: Git, Docker, Kubernetes\n'
 '- Databases: PostgreSQL, MongoDB\n'
 '\n'
 'Experience:\n'
 'Software Engineer | ABC Tech | June 2020 - Present\n'
 '- Built and maintained scalable APIs to support high-traffic e-commerce '
 'platforms.\n'
 '- Led migration of a monolithic application to a microservices architecture, '
 'reducing downtime by 30%.\n'
 '\n'
 'Education:\n'
 'B.S. in Computer Science | University of XYZ | May 2020')
---
{'honors': [],
 'schools': [{'gpa': None,
              'grad_year': 2020,
              'major': 'BS in Computer Science',