# construct pydatic model from text input

In [2]:
from pydantic_ai import Agent

agent = Agent(model="google-gla:gemini-2.5-flash")

result = await agent.run("Give me an IT employee working in Upplands Väsby, keep it short")

result

AgentRunResult(output='Lotta Svensson, Systems Administrator')

In [4]:
print(result.output)

Lotta Svensson, Systems Administrator


In [5]:
from pydantic import BaseModel, Field

class EmployeeModel(BaseModel):
    name: str
    age: int
    salary: int = Field(gt=30000, lt= 50000)
    position: str

result = await agent.run(
    "Give me a IT employee working in Upplands Väsby", output_type=EmployeeModel
)

result

AgentRunResult(output=EmployeeModel(name='Erik Karlsson', age=35, salary=45000, position='IT Consultant'))

In [6]:
employee = result.output
employee

EmployeeModel(name='Erik Karlsson', age=35, salary=45000, position='IT Consultant')

In [8]:
employee.name, employee.age

('Erik Karlsson', 35)

In [None]:
employee.model_dump() # dict

{'name': 'Erik Karlsson',
 'age': 35,
 'salary': 45000,
 'position': 'IT Consultant'}

In [12]:
print(employee.model_dump_json(indent= 2)) # json string

{
  "name": "Erik Karlsson",
  "age": 35,
  "salary": 45000,
  "position": "IT Consultant"
}


list of employees

In [14]:
result = await agent.run("Give me 10 employees in AI and data engineering fields, roles can vary, but salary must be between 30000 and 50000", output_type = list[EmployeeModel])
employees = result.output
employees

[EmployeeModel(name='Alice Smith', age=30, salary=45000, position='AI Engineer'),
 EmployeeModel(name='Bob Johnson', age=35, salary=48000, position='Data Engineer'),
 EmployeeModel(name='Charlie Brown', age=28, salary=40000, position='Machine Learning Specialist'),
 EmployeeModel(name='Diana Miller', age=42, salary=49000, position='Senior Data Scientist'),
 EmployeeModel(name='Eve Davis', age=33, salary=38000, position='AI Research Assistant'),
 EmployeeModel(name='Frank White', age=29, salary=42000, position='Big Data Analyst'),
 EmployeeModel(name='Grace Taylor', age=38, salary=49000, position='Lead AI Developer'),
 EmployeeModel(name='Henry Moore', age=31, salary=35000, position='Data Pipeline Engineer'),
 EmployeeModel(name='Ivy Green', age=27, salary=43000, position='Junior Machine Learning Engineer'),
 EmployeeModel(name='Jack Hall', age=36, salary=47000, position='AI Architect')]

In [16]:
for emp in employees:
    print(f"{emp.name= } and {emp.salary= }")

emp.name= 'Alice Smith' and emp.salary= 45000
emp.name= 'Bob Johnson' and emp.salary= 48000
emp.name= 'Charlie Brown' and emp.salary= 40000
emp.name= 'Diana Miller' and emp.salary= 49000
emp.name= 'Eve Davis' and emp.salary= 38000
emp.name= 'Frank White' and emp.salary= 42000
emp.name= 'Grace Taylor' and emp.salary= 49000
emp.name= 'Henry Moore' and emp.salary= 35000
emp.name= 'Ivy Green' and emp.salary= 43000
emp.name= 'Jack Hall' and emp.salary= 47000


## CV or resume model - more complex and neste

In [19]:
class ExperienceModel(BaseModel):
    title: str
    company: str
    description: str
    start_year: int
    end_year: int

class EducationModel(BaseModel):
    title: str
    education_area: str
    school: str
    description: str
    start_year: int
    end_year: int

class CvModel(BaseModel):
    name: str
    age: int
    experiences: list[ExperienceModel]
    educations: list[EducationModel]

result = await agent.run("""
    Create a swedish person applying for a data engineering postion
""", output_type = CvModel)

resume = result.output
resume


CvModel(name='Bjorn Borg', age=35, experiences=[ExperienceModel(title='Data Engineer', company='Ericsson', description='Developed and maintained data pipelines', start_year=2018, end_year=2023)], educations=[EducationModel(title='M.Sc. Computer Science', education_area='Computer Science', school='KTH Royal Institute of Technology', description='Specialization in distributed systems', start_year=2013, end_year=2018)])

In [24]:
resume.name, resume.experiences[0].end_year

('Bjorn Borg', 2023)

## optional postprocessing -> load into duckdb an unnest

In [25]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name = "resume_json_duckdb",
    destination = dlt.destinations.duckdb("cv.duckdb"),
    dataset_name = "staging"
)

info = pipeline.run(data = [resume.model_dump()], loader_file_format = "jsonl", table_name="cv_entries")

print(info)

Pipeline resume_json_duckdb load step completed in 0.41 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:///c:\Users\alexa\Documents\ML_and_AI\video_alongs\07_pydanticai_fundamentals\cv.duckdb location to store data
Load package 1764622603.9100604 is LOADED and contains no failed jobs


In [35]:
import duckdb

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc").df()
    cv_entries = conn.sql("from staging.cv_entries").df()
    educations = conn.sql("from staging.cv_entries__educations").df()
    experiences = conn.sql("from staging.cv_entries__experiences").df()
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Bjorn Borg,35,1764622603.9100604,gKIchgpsxlilzQ


In [36]:
educations

Unnamed: 0,title,education_area,school,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,M.Sc. Computer Science,Computer Science,KTH Royal Institute of Technology,Specialization in distributed systems,2013,2018,gKIchgpsxlilzQ,0,WV68do/pfdrsCQ


In [38]:
duckdb.sql("""
    SELECT
        cv.name,
        cv.age,
        ex.company,
        ex.description AS experience_description,
        ex.start_year as experience_start_year,
        ex.end_year as experience_end_year,
        e.title,
        e.education_area,
        e.school,
        e.start_year as education_start_year,
        e.end_year as education_end_year
    FROM cv_entries cv
    LEFT JOIN educations e on cv._dlt_id = e._dlt_parent_id
    LEFT JOIN experiences ex on cv._dlt_id = ex._dlt_parent_id
""").df()

Unnamed: 0,name,age,company,experience_description,experience_start_year,experience_end_year,title,education_area,school,education_start_year,education_end_year
0,Bjorn Borg,35,Ericsson,Developed and maintained data pipelines,2018,2023,M.Sc. Computer Science,Computer Science,KTH Royal Institute of Technology,2013,2018
