# Construct pydantic model from text input

In [1]:
from pydantic_ai import Agent

agent = Agent(model="google-gla:gemini-2.5-flash")

result = await agent.run("Give me an IT employee working in sweden, keep it short")
result

AgentRunResult(output='**Erik Lindgren**, a **System Administrator** in **Gothenburg**, Sweden.')

In [3]:
print(result.output)

**Erik Lindgren**, a **System Administrator** in **Gothenburg**, Sweden.


In [4]:
from pydantic import BaseModel, Field

class EmployeeModel(BaseModel):
    name: str
    age: int
    salary: int = Field(gt=30_000, lt=50_000)
    position: str

result = await agent.run(
    "Give me an IT emploee working in sweden", output_type=EmployeeModel
)

result

AgentRunResult(output=EmployeeModel(name='Bjorn Borg', age=45, salary=45000, position='IT Consultant'))

In [6]:
employee = result.output
employee

EmployeeModel(name='Bjorn Borg', age=45, salary=45000, position='IT Consultant')

In [9]:
employee.name, employee.age, employee.position

('Bjorn Borg', 45, 'IT Consultant')

In [11]:
employee.model_dump()

{'name': 'Bjorn Borg', 'age': 45, 'salary': 45000, 'position': 'IT Consultant'}

In [15]:
print(employee.model_dump_json(indent=2))

{
  "name": "Bjorn Borg",
  "age": 45,
  "salary": 45000,
  "position": "IT Consultant"
}


several emploees or a list of employees

In [17]:
result = await agent.run(
    """Give me ten employees in AI and data engineering fields, 
    roles can vary, but salary must be between 30000 and 50000""",
    output_type=list[EmployeeModel]
)

employees = result.output
employees

[EmployeeModel(name='Alice Smith', age=30, salary=45000, position='AI Engineer'),
 EmployeeModel(name='Bob Johnson', age=35, salary=48000, position='Data Engineer'),
 EmployeeModel(name='Charlie Brown', age=28, salary=40000, position='Machine Learning Engineer'),
 EmployeeModel(name='Diana Prince', age=32, salary=42000, position='Data Scientist'),
 EmployeeModel(name='Eve Adams', age=38, salary=49000, position='Senior Data Engineer'),
 EmployeeModel(name='Frank White', age=29, salary=38000, position='Junior AI Developer'),
 EmployeeModel(name='Grace Black', age=40, salary=49999, position='Lead Data Engineer'),
 EmployeeModel(name='Harry Green', age=33, salary=46000, position='AI Research Scientist'),
 EmployeeModel(name='Ivy Blue', age=27, salary=35000, position='ETL Developer'),
 EmployeeModel(name='Jack Red', age=31, salary=43000, position='Business Intelligence Engineer')]

In [18]:
len(employees)

10

In [19]:
for employee in employees:
    print(f"{employee.name = } and {employee.salary = }")

employee.name = 'Alice Smith' and employee.salary = 45000
employee.name = 'Bob Johnson' and employee.salary = 48000
employee.name = 'Charlie Brown' and employee.salary = 40000
employee.name = 'Diana Prince' and employee.salary = 42000
employee.name = 'Eve Adams' and employee.salary = 49000
employee.name = 'Frank White' and employee.salary = 38000
employee.name = 'Grace Black' and employee.salary = 49999
employee.name = 'Harry Green' and employee.salary = 46000
employee.name = 'Ivy Blue' and employee.salary = 35000
employee.name = 'Jack Red' and employee.salary = 43000


## CV or resume model - a more complex and nested model

In [20]:
class ExperienceModel(BaseModel):
    title: str
    company: str
    description: str
    star_year: int
    end_year: int

class EducationModel(BaseModel):
    title: str
    education_area: str
    school: str
    description: str
    star_year: int
    end_year: int

class CvModel(BaseModel):
    name: str
    age: int
    experiences: list[ExperienceModel]
    educations: list[EducationModel]

result = await agent.run(
    "Create a swedish person applying for a data engineering position",
    output_type=CvModel
)

resume = result.output
resume

CvModel(name='Björn Andersson', age=32, experiences=[ExperienceModel(title='Data Engineer', company='Spotify', description='Developed and maintained ETL pipelines, built data warehouses, and worked with big data technologies like Spark and Hadoop.', star_year=2018, end_year=2023), ExperienceModel(title='Junior Data Engineer', company='Klarna', description='Assisted in the development of data infrastructure and contributed to data quality initiatives.', star_year=2016, end_year=2018)], educations=[EducationModel(title='Master of Science in Computer Science', education_area='Data Engineering', school='KTH Royal Institute of Technology', description='Specialized in distributed systems and data management.', star_year=2014, end_year=2016), EducationModel(title='Bachelor of Science in Software Engineering', education_area='Software Engineering', school='Uppsala University', description='Focused on software development principles and algorithms.', star_year=2011, end_year=2014)])

In [22]:
resume.name, resume.age

('Björn Andersson', 32)

In [24]:
resume.experiences[0].title

'Data Engineer'

## Optional postprocessing -> load into duckdb and unnest

In [25]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name= "resume_json_duckdb",
    destination= dlt.destinations.duckdb("cv.duckdb"),
    dataset_name= "staging"
)

info = pipeline.run(data=[resume.model_dump()], loader_file_format="jsonl", table_name="cv_entries")
print(info)

Pipeline resume_json_duckdb load step completed in 0.28 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:////home/andreas/github/AI_engineering_Andreas_Reinholdsson_DE24/07_pydanticai_fundamentals/cv.duckdb location to store data
Load package 1764170362.1646302 is LOADED and contains no failed jobs


In [28]:
import duckdb

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc").df()
    cv_entries = conn.sql("from staging.cv_entries").df()
    educations = conn.sql("from staging.cv_entries__educations").df()
    experiences = conn.sql("from staging.cv_entries__experiences").df()

desc

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,cv,staging,_dlt_loads,"[load_id, schema_name, status, inserted_at, sc...","[VARCHAR, VARCHAR, BIGINT, TIMESTAMP WITH TIME...",False
1,cv,staging,_dlt_pipeline_state,"[version, engine_version, pipeline_name, state...","[BIGINT, BIGINT, VARCHAR, VARCHAR, TIMESTAMP W...",False
2,cv,staging,_dlt_version,"[version, engine_version, inserted_at, schema_...","[BIGINT, BIGINT, TIMESTAMP WITH TIME ZONE, VAR...",False
3,cv,staging,cv_entries,"[name, age, _dlt_load_id, _dlt_id]","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
4,cv,staging,cv_entries__educations,"[title, education_area, school, description, s...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, BIGINT, B...",False
5,cv,staging,cv_entries__experiences,"[title, company, description, star_year, end_y...","[VARCHAR, VARCHAR, VARCHAR, BIGINT, BIGINT, VA...",False


In [29]:
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Björn Andersson,32,1764170362.1646302,Ip4ot1dmC8x3+w


In [30]:
educations

Unnamed: 0,title,education_area,school,description,star_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,Specialized in distributed systems and data ma...,2014,2016,Ip4ot1dmC8x3+w,0,ePnqhfvh5bu2uQ
1,Bachelor of Science in Software Engineering,Software Engineering,Uppsala University,Focused on software development principles and...,2011,2014,Ip4ot1dmC8x3+w,1,BWdoX8ac3STAEA


In [31]:
experiences

Unnamed: 0,title,company,description,star_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Data Engineer,Spotify,"Developed and maintained ETL pipelines, built ...",2018,2023,Ip4ot1dmC8x3+w,0,QULspg7FFayLtQ
1,Junior Data Engineer,Klarna,Assisted in the development of data infrastruc...,2016,2018,Ip4ot1dmC8x3+w,1,331gYJiqnbz7lA


In [34]:
duckdb.sql("""
    SELECT
           cv.age,
           cv.name,
           ex.company,
           ex.description AS experienc_description,
           ex.star_year AS experience_start_year,
           ex.end_year AS experience_end_year,
           e.title,
           e.education_area,
           e.school,
           e.star_year AS education_start_year,
           e.end_year AS education_end_year
    FROM cv_entries cv
    LEFT JOIN educations e on cv._dlt_id = e._dlt_parent_id
    LEFT JOIN experiences ex on cv._dlt_id = ex._dlt_parent_id
""").df()

Unnamed: 0,age,name,company,experienc_description,experience_start_year,experience_end_year,title,education_area,school,education_start_year,education_end_year
0,32,Björn Andersson,Klarna,Assisted in the development of data infrastruc...,2016,2018,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,2014,2016
1,32,Björn Andersson,Klarna,Assisted in the development of data infrastruc...,2016,2018,Bachelor of Science in Software Engineering,Software Engineering,Uppsala University,2011,2014
2,32,Björn Andersson,Spotify,"Developed and maintained ETL pipelines, built ...",2018,2023,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,2014,2016
3,32,Björn Andersson,Spotify,"Developed and maintained ETL pipelines, built ...",2018,2023,Bachelor of Science in Software Engineering,Software Engineering,Uppsala University,2011,2014
