# Construct pydantic model from text input

In [1]:
from pydantic_ai import Agent

agent = Agent(model= "google-gla:gemini-2.5-flash")

result = await agent.run("Give me an IT employee working in sweden, keep it short")
result



AgentRunResult(output='**Björn.**\nDevOps Engineer at a tech company in Stockholm. He enjoys his fika breaks.')

In [2]:
print(result.output)

**Björn.**
DevOps Engineer at a tech company in Stockholm. He enjoys his fika breaks.


In [3]:
from pydantic import BaseModel, Field

class EmployeeModel(BaseModel):
    name: str
    age: int
    salary: int = Field(gt=30_000, lt= 50_000)
    position: str
    
result = await agent.run(
    "Give me an IT employee working in sweden", output_type=EmployeeModel
)

result

AgentRunResult(output=EmployeeModel(name='Bjorn Borg', age=45, salary=45000, position='IT Consultant'))

In [4]:
employee = result.output

employee

EmployeeModel(name='Bjorn Borg', age=45, salary=45000, position='IT Consultant')

In [5]:
employee.name, employee.age, employee.position

('Bjorn Borg', 45, 'IT Consultant')

In [6]:
employee.model_dump()

{'name': 'Bjorn Borg', 'age': 45, 'salary': 45000, 'position': 'IT Consultant'}

In [7]:
print(employee.model_dump_json(indent=2))

{
  "name": "Bjorn Borg",
  "age": 45,
  "salary": 45000,
  "position": "IT Consultant"
}


several employees or a list of employees

In [8]:
result = await agent.run(
    """Give me ten employee in AI and data engineering field, 
    roles can vary, but salary must be between 30000 and 50000""",
    output_type=list[EmployeeModel]
)


employees = result.output
employees



[EmployeeModel(name='Alice Smith', age=30, salary=45000, position='AI Engineer'),
 EmployeeModel(name='Bob Johnson', age=35, salary=49999, position='Data Scientist'),
 EmployeeModel(name='Charlie Brown', age=28, salary=40000, position='Machine Learning Engineer'),
 EmployeeModel(name='Diana Prince', age=32, salary=48000, position='Data Engineer'),
 EmployeeModel(name='Ethan Hunt', age=29, salary=42000, position='MLOps Engineer'),
 EmployeeModel(name='Fiona Glenanne', age=31, salary=47000, position='AI Research Scientist'),
 EmployeeModel(name='George Lucas', age=33, salary=49000, position='Big Data Engineer'),
 EmployeeModel(name='Hannah Abbott', age=27, salary=38000, position='Junior AI Engineer'),
 EmployeeModel(name='Ivan Drago', age=34, salary=46000, position='Senior Data Analyst'),
 EmployeeModel(name='Julia Roberts', age=26, salary=39000, position='Data Quality Engineer')]

In [9]:
len(employees)

10

In [10]:
for employee in employees:
    print(f"{employee.name= }, and {employee.salary =}")

employee.name= 'Alice Smith', and employee.salary =45000
employee.name= 'Bob Johnson', and employee.salary =49999
employee.name= 'Charlie Brown', and employee.salary =40000
employee.name= 'Diana Prince', and employee.salary =48000
employee.name= 'Ethan Hunt', and employee.salary =42000
employee.name= 'Fiona Glenanne', and employee.salary =47000
employee.name= 'George Lucas', and employee.salary =49000
employee.name= 'Hannah Abbott', and employee.salary =38000
employee.name= 'Ivan Drago', and employee.salary =46000
employee.name= 'Julia Roberts', and employee.salary =39000


## CV or resume model - a more complex and nested model

In [11]:

class ExperienceeModel(BaseModel):
    title: str
    company: str
    description: str
    start_year: int
    end_year: int
    
    
class EducationModel(BaseModel):
    title: str
    education_area: str
    school: str 
    description: str
    start_year: int
    end_year: int



class CvModel(BaseModel):
    name: str
    age: int
    experiences: list[ExperienceeModel]
    education: list[EducationModel]



result = await agent.run(
    "Create a swedish person applying for a data engineering position", output_type=CvModel
)

resume = result.output
resume

CvModel(name='Anna Lindqvist', age=32, experiences=[ExperienceeModel(title='Data Engineer', company='GlobalTech Solutions', description='Developed and maintained scalable data pipelines using Apache Spark and Kafka. Designed and implemented ETL processes for various data sources, ensuring data quality and reliability. Collaborated with cross-functional teams to understand data requirements and deliver robust data solutions.', start_year=2019, end_year=2023), ExperienceeModel(title='Junior Data Engineer', company='DataInsights AB', description='Assisted in the development and deployment of data warehousing solutions. Wrote SQL queries and scripts for data extraction and transformation. Monitored data pipeline performance and troubleshooted issues.', start_year=2017, end_year=2019)], education=[EducationModel(title='Master of Science in Computer Science', education_area='Data Engineering', school='KTH Royal Institute of Technology', description="Specialized in distributed systems, big da

In [12]:
resume.name, resume.age, 

('Anna Lindqvist', 32)

In [13]:
resume.experiences[0].title

'Data Engineer'

In [14]:
resume.model_dump()

{'name': 'Anna Lindqvist',
 'age': 32,
 'experiences': [{'title': 'Data Engineer',
   'company': 'GlobalTech Solutions',
   'description': 'Developed and maintained scalable data pipelines using Apache Spark and Kafka. Designed and implemented ETL processes for various data sources, ensuring data quality and reliability. Collaborated with cross-functional teams to understand data requirements and deliver robust data solutions.',
   'start_year': 2019,
   'end_year': 2023},
  {'title': 'Junior Data Engineer',
   'company': 'DataInsights AB',
   'description': 'Assisted in the development and deployment of data warehousing solutions. Wrote SQL queries and scripts for data extraction and transformation. Monitored data pipeline performance and troubleshooted issues.',
   'start_year': 2017,
   'end_year': 2019}],
 'education': [{'title': 'Master of Science in Computer Science',
   'education_area': 'Data Engineering',
   'school': 'KTH Royal Institute of Technology',
   'description': "Spe

## Optional postprocessing -> load into duckdb and unnest


In [15]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name= "resume_json_duckdb",
    destination = dlt.destinations.duckdb("cv.duckdb"),
    dataset_name = "staging"
)

info = pipeline.run(data= [resume.model_dump()], loader_file_format= "jsonl", table_name= "cv_entries")

print(info)

Pipeline resume_json_duckdb load step completed in 0.18 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:///c:\Users\MAER\Documents\STI\dataplattform_maskininlärning_artificiell_intelligens\AI_engineering_Marcus_Ericsson_de24\video_alongs\07_pydanticAI_fundamental\cv.duckdb location to store data
Load package 1764190299.5868092 is LOADED and contains no failed jobs


In [16]:
import duckdb

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc").df()
    cv_entries =  conn.sql("from staging.cv_entries").df()
    educations =  conn.sql("from staging.cv_entries__education").df()
    experiences = conn.sql("from staging.cv_entries__experiences").df()
    
desc 

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,cv,staging,_dlt_loads,"[load_id, schema_name, status, inserted_at, sc...","[VARCHAR, VARCHAR, BIGINT, TIMESTAMP WITH TIME...",False
1,cv,staging,_dlt_pipeline_state,"[version, engine_version, pipeline_name, state...","[BIGINT, BIGINT, VARCHAR, VARCHAR, TIMESTAMP W...",False
2,cv,staging,_dlt_version,"[version, engine_version, inserted_at, schema_...","[BIGINT, BIGINT, TIMESTAMP WITH TIME ZONE, VAR...",False
3,cv,staging,cv_entries,"[name, age, _dlt_load_id, _dlt_id]","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
4,cv,staging,cv_entries__education,"[title, education_area, school, description, s...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, BIGINT, B...",False
5,cv,staging,cv_entries__experiences,"[title, company, description, start_year, end_...","[VARCHAR, VARCHAR, VARCHAR, BIGINT, BIGINT, VA...",False


In [17]:
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Anna Lindqvist,32,1764190299.5868092,LdkIxp+/1Hynmw


In [18]:
educations

Unnamed: 0,title,education_area,school,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,"Specialized in distributed systems, big data t...",2015,2017,LdkIxp+/1Hynmw,0,B0riEEHjnlVdng
1,Bachelor of Science in Software Engineering,Software Development,Uppsala University,Gained strong foundational knowledge in progra...,2012,2015,LdkIxp+/1Hynmw,1,RokevjIeN3SjvA


In [19]:
experiences

Unnamed: 0,title,company,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Data Engineer,GlobalTech Solutions,Developed and maintained scalable data pipelin...,2019,2023,LdkIxp+/1Hynmw,0,ippe1hjxtKxmGg
1,Junior Data Engineer,DataInsights AB,Assisted in the development and deployment of ...,2017,2019,LdkIxp+/1Hynmw,1,z2kcLtbFMoO5CQ


In [20]:
duckdb.sql(
    """
    SELECT 
        cv.name,
        cv.age,
        ex.company,
        ex.description AS experience_description,
        ex.start_year AS experience_start_year,
        ex.end_year AS experience_end_year,
        e.title,
        e.education_area,
        e.school,
        e.start_year AS educational_start_year,
        e.end_year AS education_end_year
    FROM cv.cv_entries cv
    LEFT JOIN educations e ON cv._dlt_id = e._dlt_parent_id
    LEFT JOIN experiences ex ON cv._dlt_id = ex._dlt_parent_id
    """
).df()

Unnamed: 0,name,age,company,experience_description,experience_start_year,experience_end_year,title,education_area,school,educational_start_year,education_end_year
0,Anna Lindqvist,32,DataInsights AB,Assisted in the development and deployment of ...,2017,2019,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,2015,2017
1,Anna Lindqvist,32,DataInsights AB,Assisted in the development and deployment of ...,2017,2019,Bachelor of Science in Software Engineering,Software Development,Uppsala University,2012,2015
2,Anna Lindqvist,32,GlobalTech Solutions,Developed and maintained scalable data pipelin...,2019,2023,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,2015,2017
3,Anna Lindqvist,32,GlobalTech Solutions,Developed and maintained scalable data pipelin...,2019,2023,Bachelor of Science in Software Engineering,Software Development,Uppsala University,2012,2015
