## Construct pydantic model from text input


In [55]:
from pydantic_ai import Agent

agent = Agent(model="google-gla:gemini-2.5-flash")

result = await agent.run("Give me an IT employee working in Sweden, keep it short.")

In [56]:
result

AgentRunResult(output='**Björn Svensson**\nA Software Developer working for a tech startup in Stockholm, specializing in backend systems.')

In [57]:
print(result.output)

**Björn Svensson**
A Software Developer working for a tech startup in Stockholm, specializing in backend systems.


In [58]:
from pydantic import BaseModel, Field


class Employee(BaseModel):
    name: str = Field(..., description="The full name of the employee")
    age: int = Field(..., description="The age of the employee")
    role: str = Field(..., description="The job role of the employee")
    country: str = Field(..., description="The country where the employee is located")
    position: str = Field(..., description="The position level of the employee")


result = await agent.run(
    "Provide details of an IT employee working in Sweden.", output_type=Employee
)

result

AgentRunResult(output=Employee(name='John Doe', age=30, role='Software Engineer', country='Sweden', position='Individual Contributor'))

In [59]:
result.output

Employee(name='John Doe', age=30, role='Software Engineer', country='Sweden', position='Individual Contributor')

In [60]:
employee = result.output
employee.name

'John Doe'

In [61]:
employee.model_dump()

{'name': 'John Doe',
 'age': 30,
 'role': 'Software Engineer',
 'country': 'Sweden',
 'position': 'Individual Contributor'}

In [62]:
print(employee.model_dump_json(indent=2))

{
  "name": "John Doe",
  "age": 30,
  "role": "Software Engineer",
  "country": "Sweden",
  "position": "Individual Contributor"
}


### List of employees


In [63]:
from pydantic import BaseModel, Field


class Employee(BaseModel):
    name: str = Field(..., description="The full name of the employee")
    age: int = Field(..., description="The age of the employee")
    role: str = Field(..., description="The job role of the employee")
    country: str = Field(..., description="The country where the employee is located")
    position: str = Field(..., description="The position level of the employee")
    salary: int = Field(..., description="The monthly salary of the employee in SEK")


result = await agent.run(
    "Give me ten employees in Ai and Data Engineering fields working in Sweden, roles can vary but salary must be between 50000 and 100000 SEK.",
    output_type=list[Employee],
)

employees = result.output
employees

[Employee(name='Alice Johnson', age=32, role='AI Engineer', country='Sweden', position='Senior', salary=65000),
 Employee(name='Bob Anderson', age=28, role='Data Engineer', country='Sweden', position='Mid-level', salary=58000),
 Employee(name='Charlie Brown', age=35, role='Machine Learning Scientist', country='Sweden', position='Lead', salary=85000),
 Employee(name='Diana Miller', age=30, role='Data Scientist', country='Sweden', position='Senior', salary=72000),
 Employee(name='Eve Davis', age=25, role='Junior AI Engineer', country='Sweden', position='Junior', salary=52000),
 Employee(name='Frank White', age=40, role='Principal Data Engineer', country='Sweden', position='Principal', salary=95000),
 Employee(name='Grace Lee', age=33, role='AI Research Engineer', country='Sweden', position='Senior', salary=78000),
 Employee(name='Harry Clark', age=29, role='Big Data Engineer', country='Sweden', position='Mid-level', salary=61000),
 Employee(name='Ivy Rodriguez', age=38, role='Head of AI'

In [64]:
len(employees)

10

In [65]:
for employee in employees:
    print(employee.model_dump_json(indent=2))

{
  "name": "Alice Johnson",
  "age": 32,
  "role": "AI Engineer",
  "country": "Sweden",
  "position": "Senior",
  "salary": 65000
}
{
  "name": "Bob Anderson",
  "age": 28,
  "role": "Data Engineer",
  "country": "Sweden",
  "position": "Mid-level",
  "salary": 58000
}
{
  "name": "Charlie Brown",
  "age": 35,
  "role": "Machine Learning Scientist",
  "country": "Sweden",
  "position": "Lead",
  "salary": 85000
}
{
  "name": "Diana Miller",
  "age": 30,
  "role": "Data Scientist",
  "country": "Sweden",
  "position": "Senior",
  "salary": 72000
}
{
  "name": "Eve Davis",
  "age": 25,
  "role": "Junior AI Engineer",
  "country": "Sweden",
  "position": "Junior",
  "salary": 52000
}
{
  "name": "Frank White",
  "age": 40,
  "role": "Principal Data Engineer",
  "country": "Sweden",
  "position": "Principal",
  "salary": 95000
}
{
  "name": "Grace Lee",
  "age": 33,
  "role": "AI Research Engineer",
  "country": "Sweden",
  "position": "Senior",
  "salary": 78000
}
{
  "name": "Harry Cla

### CV or Resume model - a more complex and nested model


In [66]:
class Experience(BaseModel):
    company: str
    role: str
    duration: str

class Education(BaseModel):
    institution: str
    degree: str
    year: int

class CV(BaseModel):
    name: str
    age: int
    skills: list[str]
    experiences: list[Experience]
    education: list[Education]

result = await agent.run(
    "Generate a CV for a fictional Data Engineer with 5 years of experience in Python, SQL, and cloud technologies.",
    output_type=CV,
)
resume = result.output


In [67]:
resume.name

'John Doe'

In [68]:
resume.experiences

[Experience(company='DataFlow Innovations', role='Senior Data Engineer', duration='3 years'),
 Experience(company='Tech Solutions Inc.', role='Data Engineer', duration='2 years')]

### Optional postprocessing -> load into duckdb and unnest


In [69]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name="cv_json_duckdb",
    destination=dlt.destinations.duckdb("cv.duckdb"),
    dataset_name="staging",
)

info = pipeline.run(
    data=[resume.model_dump()], loader_file_format="jsonl", table_name="cv_entries"
)
print(info)

Pipeline cv_json_duckdb load step completed in 0.09 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:////Users/kidquatro/Documents/STI-DE24/Ai Engineering/ai-engineering-robin-sundman-nilsson-de24/07_pydanticai_fundmentals/cv.duckdb location to store data
Load package 1764612490.4624572 is LOADED and contains no failed jobs


In [70]:
import duckdb

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc").df()
    cv_entries = conn.sql("from staging.cv_entries").df()
    education = conn.sql("from staging.cv_entries__education").df()
    experiences = conn.sql("from staging.cv_entries__experiences").df()

desc

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,cv,staging,_dlt_loads,"[load_id, schema_name, status, inserted_at, sc...","[VARCHAR, VARCHAR, BIGINT, TIMESTAMP WITH TIME...",False
1,cv,staging,_dlt_pipeline_state,"[version, engine_version, pipeline_name, state...","[BIGINT, BIGINT, VARCHAR, VARCHAR, TIMESTAMP W...",False
2,cv,staging,_dlt_version,"[version, engine_version, inserted_at, schema_...","[BIGINT, BIGINT, TIMESTAMP WITH TIME ZONE, VAR...",False
3,cv,staging,cv_entries,"[name, age, _dlt_load_id, _dlt_id]","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
4,cv,staging,cv_entries__education,"[value, _dlt_parent_id, _dlt_list_idx, _dlt_id...","[VARCHAR, VARCHAR, BIGINT, VARCHAR, VARCHAR, V...",False
5,cv,staging,cv_entries__experiences,"[value, _dlt_parent_id, _dlt_list_idx, _dlt_id...","[VARCHAR, VARCHAR, BIGINT, VARCHAR, VARCHAR, V...",False
6,cv,staging,cv_entries__skills,"[value, _dlt_parent_id, _dlt_list_idx, _dlt_id]","[VARCHAR, VARCHAR, BIGINT, VARCHAR]",False


In [71]:
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Alice Smith,30,1764611634.045873,/AxPxpq/Vo8abQ
1,Alice Smith,28,1764612352.221492,ZI0yXvOal7z/eA
2,John Doe,30,1764612490.4624572,iPmOLve8dZPB4w


In [72]:
education

Unnamed: 0,value,_dlt_parent_id,_dlt_list_idx,_dlt_id,institution,degree,year
0,"M.Sc. in Data Science, University of Technolog...",/AxPxpq/Vo8abQ,0,QI5DKi3nrnhhXg,,,
1,"B.Sc. in Computer Science, State University (2...",/AxPxpq/Vo8abQ,1,b7iWx7Rb56SX5A,,,
2,"Master of Science in Data Science, Tech Univer...",ZI0yXvOal7z/eA,0,wLwxul009m7khg,,,
3,"Bachelor of Science in Computer Science, State...",ZI0yXvOal7z/eA,1,Xi8cTxqiURoeBA,,,
4,,iPmOLve8dZPB4w,0,9n27n+NZZ3myDw,University of Example,Master of Science in Computer Science,2017.0


In [73]:
experiences

Unnamed: 0,value,_dlt_parent_id,_dlt_list_idx,_dlt_id,company,role,duration
0,"Senior Data Engineer, Tech Solutions Inc. (202...",/AxPxpq/Vo8abQ,0,FPl5Ksjpc+n4eQ,,,
1,"Data Engineer, Data Innovations LLC. (2019 - 2...",/AxPxpq/Vo8abQ,1,28aztqHZD++KQw,,,
2,"Senior Data Engineer, GlobalTech Solutions (Ju...",ZI0yXvOal7z/eA,0,cENw7UJ7/PuTIQ,,,
3,"Data Engineer, Innovate Data Systems (June 201...",ZI0yXvOal7z/eA,1,w/fqiGPoBm2v4g,,,
4,,iPmOLve8dZPB4w,0,yhaxTl1CPx/PBQ,DataFlow Innovations,Senior Data Engineer,3 years
5,,iPmOLve8dZPB4w,1,xn1YUQhn4SYiQA,Tech Solutions Inc.,Data Engineer,2 years


In [74]:
duckdb.sql(
    """
    SELECT 
        cv.name, 
        cv.age, 
        ex.company, 
        ex.role, 
        ex.duration, 
        ed.institution, 
        ed.degree, 
        ed.year
    FROM cv_entries cv
    LEFT JOIN education ed ON cv._dlt_id = ed._dlt_parent_id
    LEFT JOIN experiences ex ON cv._dlt_id = ex._dlt_parent_id
""").df()

Unnamed: 0,name,age,company,role,duration,institution,degree,year
0,Alice Smith,30,,,,,,
1,Alice Smith,30,,,,,,
2,Alice Smith,28,,,,,,
3,Alice Smith,28,,,,,,
4,John Doe,30,DataFlow Innovations,Senior Data Engineer,3 years,University of Example,Master of Science in Computer Science,2017.0
5,John Doe,30,Tech Solutions Inc.,Data Engineer,2 years,University of Example,Master of Science in Computer Science,2017.0
6,Alice Smith,30,,,,,,
7,Alice Smith,30,,,,,,
8,Alice Smith,28,,,,,,
9,Alice Smith,28,,,,,,
