# Construct pydantic model from text input

In [45]:
from pydantic_ai import Agent

agent = Agent(model="google-gla:gemini-2.5-flash")

result = await agent.run("Give me an IT emplyee working in Sweden, keep it short")
result

AgentRunResult(output='**Malin Andersson**, a Cloud Engineer who loves her fika breaks.')

In [46]:
result.output

'**Malin Andersson**, a Cloud Engineer who loves her fika breaks.'

In [47]:
from pydantic import BaseModel, Field

class EmployeeModel(BaseModel):
    name: str
    age: int
    salary: int = Field(gt = 30_000, lt = 50_000)
    position: str

result = await agent.run(
    "Give me an IT employee working in Sweden", output_type=EmployeeModel
)

result

AgentRunResult(output=EmployeeModel(name='Bjorn', age=36, salary=45000, position='IT Consultant'))

In [48]:
employee = result.output
employee

EmployeeModel(name='Bjorn', age=36, salary=45000, position='IT Consultant')

In [49]:
employee.name, employee.age, employee.position

('Bjorn', 36, 'IT Consultant')

In [50]:
employee.model_dump()

{'name': 'Bjorn', 'age': 36, 'salary': 45000, 'position': 'IT Consultant'}

In [51]:
print(employee.model_dump_json(indent=2))

{
  "name": "Bjorn",
  "age": 36,
  "salary": 45000,
  "position": "IT Consultant"
}


several employees or a list of employees

In [52]:
result = await agent.run(
    "Give me ten employees in AI and data engineering fields, roles can vary but salary must be between 30000 and 50000",
    output_type=list[EmployeeModel]
)

employees = result.output
employees

[EmployeeModel(name='Alice', age=30, salary=45000, position='AI Engineer'),
 EmployeeModel(name='Bob', age=35, salary=48000, position='Data Scientist'),
 EmployeeModel(name='Charlie', age=28, salary=40000, position='Machine Learning Engineer'),
 EmployeeModel(name='David', age=40, salary=49000, position='Lead Data Engineer'),
 EmployeeModel(name='Eve', age=32, salary=42000, position='AI Researcher'),
 EmployeeModel(name='Frank', age=29, salary=38000, position='Data Analyst'),
 EmployeeModel(name='Grace', age=38, salary=49000, position='Senior AI Engineer'),
 EmployeeModel(name='Heidi', age=31, salary=43000, position='MLOps Engineer'),
 EmployeeModel(name='Ivan', age=34, salary=47000, position='Data Architect'),
 EmployeeModel(name='Judy', age=27, salary=39000, position='Junior Data Engineer')]

In [53]:
len(employees)

10

In [54]:
for employee in employees:
    print(f"{employee.name = } and {employee.salary = }")

employee.name = 'Alice' and employee.salary = 45000
employee.name = 'Bob' and employee.salary = 48000
employee.name = 'Charlie' and employee.salary = 40000
employee.name = 'David' and employee.salary = 49000
employee.name = 'Eve' and employee.salary = 42000
employee.name = 'Frank' and employee.salary = 38000
employee.name = 'Grace' and employee.salary = 49000
employee.name = 'Heidi' and employee.salary = 43000
employee.name = 'Ivan' and employee.salary = 47000
employee.name = 'Judy' and employee.salary = 39000


## CV or resume model - a more complex and nested model

In [55]:
class ExperienceModel(BaseModel):
    title: str
    company: str
    description: str
    start_year: int
    end_year: int


class EducationModel(BaseModel):
    title: str
    education_area: str
    school: str
    description: str
    start_year: int
    end_year: int

class CvModel(BaseModel):
    name: str
    age: int
    experiences: list[ExperienceModel]
    educations: list[EducationModel]

result = await agent.run(
    "Create a swedish person applying for a data engineering postition",
    output_type=CvModel
)

resume = result.output
resume



CvModel(name='Erik Karlsson', age=32, experiences=[ExperienceModel(title='Data Engineer', company='Swedbank', description='Developed and maintained data pipelines using Kafka and Spark.', start_year=2019, end_year=2023), ExperienceModel(title='Junior Data Engineer', company='Volvo', description='Assisted in building and optimizing ETL processes.', start_year=2017, end_year=2019)], educations=[EducationModel(title='Master of Science in Computer Science', education_area='Data Engineering', school='KTH Royal Institute of Technology', description='Specialized in distributed systems and big data technologies.', start_year=2015, end_year=2017), EducationModel(title='Bachelor of Science in Software Engineering', education_area='Software Engineering', school='Uppsala University', description='Focused on software development and database management.', start_year=2012, end_year=2015)])

In [56]:
resume.name, resume.age, 

('Erik Karlsson', 32)

In [57]:
resume.experiences

[ExperienceModel(title='Data Engineer', company='Swedbank', description='Developed and maintained data pipelines using Kafka and Spark.', start_year=2019, end_year=2023),
 ExperienceModel(title='Junior Data Engineer', company='Volvo', description='Assisted in building and optimizing ETL processes.', start_year=2017, end_year=2019)]

In [58]:
resume.experiences[0].title

'Data Engineer'

## Optional postprocessing -> load into duckdb and unnest

In [59]:
import dlt 

pipeline = dlt.pipeline(
    pipeline_name="resume_json_duckdb",
    destination=dlt.destinations.duckdb("cv.duckdb"),
    dataset_name="staging"
)

info = pipeline.run(data=[resume.model_dump()], loader_file_format="jsonl", table_name="cv_entries")
print(info)

Pipeline resume_json_duckdb load step completed in 0.04 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:////Users/henrik/Documents/github/ai_engineering_henrik_sjogren_de24/video_alongs/07_pydanticai_fundamentals/cv.duckdb location to store data
Load package 1764772032.447328 is LOADED and contains no failed jobs


In [60]:
import duckdb

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc").df()
    cv_entries = conn.sql("from staging.cv_entries").df()
    educations = conn.sql("from staging.cv_entries__educations").df()
    experiences = conn.sql("from staging.cv_entries__experiences").df()

desc

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,cv,staging,_dlt_loads,"[load_id, schema_name, status, inserted_at, sc...","[VARCHAR, VARCHAR, BIGINT, TIMESTAMP WITH TIME...",False
1,cv,staging,_dlt_pipeline_state,"[version, engine_version, pipeline_name, state...","[BIGINT, BIGINT, VARCHAR, VARCHAR, TIMESTAMP W...",False
2,cv,staging,_dlt_version,"[version, engine_version, inserted_at, schema_...","[BIGINT, BIGINT, TIMESTAMP WITH TIME ZONE, VAR...",False
3,cv,staging,cv_entries,"[name, age, _dlt_load_id, _dlt_id]","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
4,cv,staging,cv_entries__educations,"[title, educations_area, school, description, ...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, BIGINT, B...",False
5,cv,staging,cv_entries__experiences,"[title, company, description, start_year, end_...","[VARCHAR, VARCHAR, VARCHAR, BIGINT, BIGINT, VA...",False


In [61]:
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Björn Borg,45,1764771278.891382,33PTOjI3HlJE+Q
1,Bjorn Borg,35,1764771978.7826188,wx90m2mgIZqZrA
2,Erik Karlsson,32,1764772032.447328,kZOf4TSfFlKErw


In [62]:
educations

Unnamed: 0,title,educations_area,school,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id,education_area
0,M.Sc. in Computer Science,Data Engineering,KTH Royal Institute of Technology,Master's thesis on real-time data processing.,2014,2016,33PTOjI3HlJE+Q,0,zkHHRc4PobFIIA,
1,B.Sc. in Software Development,Software Engineering,Uppsala University,Focused on database systems and algorithms.,2011,2014,33PTOjI3HlJE+Q,1,xcgrZGJuePVR6w,
2,MSc in Data Science,,KTH Royal Institute of Technology,Specialized in big data technologies,2013,2015,wx90m2mgIZqZrA,0,EQfIjPD6WoPBWQ,Data Science
3,BSc in Computer Science,,Uppsala University,Focused on algorithms and data structures,2010,2013,wx90m2mgIZqZrA,1,ICaYTd1CL0O7tQ,Computer Science
4,Master of Science in Computer Science,,KTH Royal Institute of Technology,Specialized in distributed systems and big dat...,2015,2017,kZOf4TSfFlKErw,0,j6c+aY+3g332jg,Data Engineering
5,Bachelor of Science in Software Engineering,,Uppsala University,Focused on software development and database m...,2012,2015,kZOf4TSfFlKErw,1,wIEPZL8nOcc7/Q,Software Engineering


In [63]:
experiences

Unnamed: 0,title,company,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Data Engineer,Spotify,Developed and maintained data pipelines.,2018,2023,33PTOjI3HlJE+Q,0,tx0n9fXSEt5D3g
1,Junior Data Engineer,Ericsson,Assisted in data warehouse management.,2016,2018,33PTOjI3HlJE+Q,1,s5Ou6XEw7Wk39Q
2,Data Engineer,Spotify,Developed and maintained data pipelines,2018,2023,wx90m2mgIZqZrA,0,gd1PiMSIjjnNhg
3,Junior Data Engineer,H&M,Assisted in data warehouse management,2015,2018,wx90m2mgIZqZrA,1,RKyHe1QupqWljg
4,Data Engineer,Swedbank,Developed and maintained data pipelines using ...,2019,2023,kZOf4TSfFlKErw,0,/+QwkM+6Se8akg
5,Junior Data Engineer,Volvo,Assisted in building and optimizing ETL proces...,2017,2019,kZOf4TSfFlKErw,1,RgHv5XditNoAYw


In [65]:
# duckdb.sql("""
#     SELECT
#            cv.name,
#            cv.age,
#            ex.company,
#            ex.description AS experience_description,
#            ex.start_year AS experience_start_year,
#            ex.end_year AS ecperience_end_year,
#            e.title,
#            e.education_area,
#            e.school,
#            e.start_year AS education_start_year,
#            e.end_year AS education_end_year
#     FROM cv_entries
#     LEFT JOIN educations e ON cv._dlt_id = e._dlt_parent_id
#     LEFT JOIN experiences ex ON cv._dlt_id = ex._dlt_parent_id
# """)

duckdb.sql("""
    SELECT 
        cv.name, 
        cv.age, 
        ex.company,
        ex.description AS experience_description,
        ex.start_year AS experience_start_year,
        ex.end_year AS experience_end_year,
        e.title,
        e.education_area,
        e.school,
        e.start_year AS education_start_year,
        e.end_year AS education_end_year
    FROM cv_entries cv
    LEFT JOIN educations e ON cv._dlt_id = e._dlt_parent_id
    LEFT JOIN experiences ex ON cv._dlt_id = ex._dlt_parent_id
    

""").df()

Unnamed: 0,name,age,company,experience_description,experience_start_year,experience_end_year,title,education_area,school,education_start_year,education_end_year
0,Björn Borg,45,Ericsson,Assisted in data warehouse management.,2016,2018,M.Sc. in Computer Science,,KTH Royal Institute of Technology,2014,2016
1,Björn Borg,45,Ericsson,Assisted in data warehouse management.,2016,2018,B.Sc. in Software Development,,Uppsala University,2011,2014
2,Bjorn Borg,35,H&M,Assisted in data warehouse management,2015,2018,MSc in Data Science,Data Science,KTH Royal Institute of Technology,2013,2015
3,Bjorn Borg,35,H&M,Assisted in data warehouse management,2015,2018,BSc in Computer Science,Computer Science,Uppsala University,2010,2013
4,Erik Karlsson,32,Volvo,Assisted in building and optimizing ETL proces...,2017,2019,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,2015,2017
5,Erik Karlsson,32,Volvo,Assisted in building and optimizing ETL proces...,2017,2019,Bachelor of Science in Software Engineering,Software Engineering,Uppsala University,2012,2015
6,Björn Borg,45,Spotify,Developed and maintained data pipelines.,2018,2023,M.Sc. in Computer Science,,KTH Royal Institute of Technology,2014,2016
7,Björn Borg,45,Spotify,Developed and maintained data pipelines.,2018,2023,B.Sc. in Software Development,,Uppsala University,2011,2014
8,Bjorn Borg,35,Spotify,Developed and maintained data pipelines,2018,2023,MSc in Data Science,Data Science,KTH Royal Institute of Technology,2013,2015
9,Bjorn Borg,35,Spotify,Developed and maintained data pipelines,2018,2023,BSc in Computer Science,Computer Science,Uppsala University,2010,2013
