<a href="https://colab.research.google.com/github/Fuenfgeld/Agent_Tutorial_PydanticAI/blob/main/02_StructuredData_ResultValidators_PydanticAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip -q install pydantic-ai
%pip -q install nest_asyncio
%pip -q install logfire

In [None]:
!git clone https://github.com/Fuenfgeld/Agent_Tutorial_PydanticAI.git

In [None]:
import os
from google.colab import userdata

keyAntropic = userdata.get('Claude')
keyOpenAI = userdata.get('openAI')
keyLogFire = userdata.get('logfire')


os.environ["OPENAI_API_KEY"] = keyOpenAI
os.environ["ANTHROPIC_API_KEY"] = keyAntropic

import nest_asyncio
nest_asyncio.apply()
#logfire.configure(token=keyLogFire)

# Structured Data models as output
https://ai.pydantic.dev/output/#structured-output

In [None]:
import os
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic import BaseModel

# Define the model
model = OpenAIChatModel('gpt-4o-mini')

# Define the output model
class Box(BaseModel):
    """Caputers the size of a box with its units"""
    width: int
    height: int
    depth: int
    units: str

# Define the agent
agent = Agent(model=model,
              output_type=Box,
              system_prompt="Extract me the dimensions of a box",)

# Run the agent
result = agent.run_sync("The box is 10x20x30 cm")

print( f"""
The result is a pydantic data class of type: {type(result.output)}
within the width: {result.output.width} width type: {type(result.output.width)}
within the unit: {result.output.units} units type: {type(result.output.units)}
""")

## Nested Data Structure

In [None]:
import os
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIModel
from pydantic import BaseModel, Field

from typing import List, Optional
from datetime import date
from enum import Enum
from pprint import pprint

# Define the model
model = OpenAIModel('gpt-4o-mini')


# Enum to represent gender options — teaches use of fixed choices
class Gender(str, Enum):
    """Enum to represent gender options"""
    male = "male"
    female = "female"
    other = "other"


class Allergy(BaseModel):
    """Describes an allergy, including the substance and optional reaction details"""
    substance: str  = Field(...,description="What the patient is allergic to ")
    reaction: Optional[str]     = Field(...,description="Description of the reaction")


class Medication(BaseModel):
    """Describes a medication the patient is currently taking"""
    name: str = Field(..., description="Name of the medication ")
    dose_mg: float = Field(..., description="Dosage in milligrams ")
    frequency_per_day: int = Field(..., description="How many times per day the medication is taken")


class Condition(BaseModel):
    """Represents a past or current medical condition"""
    name: str = Field(..., description="Name of the condition (e.g., 'Asthma')") # it can be problematic to prime LLM models with examples
    diagnosed_date: Optional[date] = Field(None, description="Date the condition was diagnosed")
    chronic: bool = Field(..., description="Whether the condition is long-term")


class PatientHistory(BaseModel):
    """Captures a patient's overall medical history including medications, allergies, and diagnoses"""
    name: str = Field(..., description="Full name of the patient")
    birth_date: date = Field(..., description="Patient's date of birth")
    gender: Gender = Field(..., description="Patient's gender")
    height_cm: Optional[float] = Field(None, description="Height in centimeters")
    weight_kg: Optional[float] = Field(None, description="Weight in kilograms")
    smoker: bool = Field(..., description="Whether the patient currently smokes")

    allergies: List[Allergy] = Field(default_factory=list, description="List of known allergies")
    medications: List[Medication] = Field(default_factory=list, description="List of current medications")
    conditions: List[Condition] = Field(default_factory=list, description="List of medical conditions")

# Define the agent
patient_generator_agent = Agent(model=model,
              output_type=PatientHistory,
              system_prompt="You are a writer for synthetic patient history, the history will be used as examples for students",)

# Run the agent
result = patient_generator_agent.run_sync("Generate patient history for Jane Bond", model_settings={'temperature': 1.0})

print( f"""
The result is a pydantic data class of type: {type(result.output)}
here are the details:
""")
pprint(result.output.model_dump_json(indent=2))

## Data extraction from unstructured text

In [None]:
with open("/content/Agent_Tutorial_PydanticAI/data/JoubertEng.txt", 'r') as file:
    dr_letter = file.read()

In [None]:
print(dr_letter)

### data Model

In [None]:
import os
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic import BaseModel, Field

from typing import List, Optional
from datetime import date
from enum import Enum
from pprint import pprint

# Define the model
model = OpenAIChatModel('gpt-4o-mini')


class Medication(BaseModel):
    name: str
    dosage: Optional[str] = None
    frequency: Optional[str] = None

    def __str__(self):
        parts = [self.name]
        if self.dosage:
            parts.append(self.dosage)
        if self.frequency:
            parts.append(self.frequency)
        return ": ".join(parts)

class PatientHistory(BaseModel):
    chief_complaint: str
    current_medications: List[Medication] = []
    allergies: List[str] = []
    other_history: Optional[str] = None

class MedicalLetter(BaseModel):
    # Patient and visit information
    patient_name: str
    patient_dob: date
    visit_date: date
    case_number: str
    insurance: Optional[str] = None

    # Clinical information
    patient_history: PatientHistory

    # Examination findings (simplified to text descriptions)
    examination_findings: List[str] = []

    # Diagnosis and treatment
    diagnoses: List[str]
    recommendations: List[str] = []
    prescribed_medications: List[Medication] = []
    follow_up: Optional[str] = None

# Define the agent
agent = Agent(model=model,
              output_type=MedicalLetter,
              system_prompt="You are a medical coding assisstent to extract infromation from doctors letter into a structured data model",)

# Run the agent
result = agent.run_sync(f"extract the data for the following letter: {dr_letter} ", model_settings={'temperature': 0})

print( f"""
The result is a pydantic data class of type: {type(result.output)}
here are the details:
""")
pprint(result.output.model_dump_json(indent=2))

### Exercise
create the data model and the extractor for the other Dr letter


```
/content/Agent_Tutorial_PydanticAI/data/UtzEng.txt
```



In [None]:
with open("/content/Agent_Tutorial_PydanticAI/data/UtzEng.txt", 'r') as file:
    dr_letter = file.read()
print(dr_letter)

In [None]:
import os
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic import BaseModel, Field

from typing import List, Optional
from datetime import date, datetime
from enum import Enum
from pprint import pprint

# Define the model
model = OpenAIChatModel('gpt-4o-mini')
with open("/content/Agent_Tutorial_PydanticAI/data/UtzEng.txt", 'r') as file:
    dr_letter = file.read()



class PatientInfo(BaseModel):
    name: str = Field(..., description="Patient's full name")
    date_of_birth: date = Field(..., description="Patient's date of birth")
    residence: str = Field(..., description="Patient's place of residence")

class Diagnosis(BaseModel):
    condition: str = Field(..., description="Name of the diagnosed condition")
    date_diagnosed: Optional[str] = Field(None, description="When the condition was diagnosed")
    details: Optional[str] = Field(None, description="Additional details about the diagnosis")
    icd_code: Optional[str] = Field(None, description="ICD code if available")

class PreviousTherapy(BaseModel):
    period: str = Field(..., description="Time period of the therapy")
    description: str = Field(..., description="Description of the therapy")
    icd_code: Optional[str] = Field(None, description="Related ICD code if available")

class Medication(BaseModel):
    name: str = Field(..., description="Name of the medication")
    dosage: str = Field(..., description="Dosage instructions")
    purpose: Optional[str] = Field(None, description="Purpose of the medication")

class MedicationAdministered(BaseModel):
    name: str = Field(..., description="Name of the medication")
    dose: str = Field(..., description="Dose of medication")
    duration: str = Field(..., description="Duration of administration")

class ChemotherapySession(BaseModel):
    cycle_number: int = Field(..., description="Cycle number of chemotherapy")
    session_date: date = Field(..., description="Date of administration")
    administered_medications: List[MedicationAdministered] = Field(..., description="Medications administered")
    tolerance: str = Field(..., description="How well the therapy was tolerated")

class ClinicalFinding(BaseModel):
    category: str = Field(..., description="Category of the finding")
    description: str = Field(..., description="Detailed description of the finding")

class DoctorsLetter(BaseModel):
    letter_date: date = Field(..., description="Date of the doctor's letter")
    patient: PatientInfo = Field(..., description="Patient information")
    diagnoses: List[Diagnosis] = Field(..., description="List of diagnoses")
    current_therapy: Optional[str] = Field(None, description="Current therapy")
    previous_therapies: List[PreviousTherapy] = Field([], description="Previous therapies")
    recommended_medications: List[Medication] = Field([], description="Recommended medications")
    therapy_details: Optional[ChemotherapySession] = Field(None, description="Details of current therapy session")
    clinical_findings: List[ClinicalFinding] = Field([], description="Clinical findings during examination")
    summary: str = Field(..., description="Summary of the treatment and patient's condition")
    discharge_date: Optional[date] = Field(None, description="Date of discharge")
    discharge_condition: Optional[str] = Field(None, description="Patient's condition upon discharge")
    physician: str = Field(..., description="Name of the physician")

# Define the agent
agent = Agent(model=model,
              output_type=DoctorsLetter,
              system_prompt="You are a medical coding assisstent to extract infromation from doctors letter into a structured data model",)

# Run the agent
result = agent.run_sync(f"extract the data for the following letter: {dr_letter} ", model_settings={'temperature': 0})

print( f"""
The result is a pydantic data class of type: {type(result.output)}
here are the details:
""")
pprint(result.output.model_dump_json(indent=2))

#Output Validators

Sometimes ther is a need to go byond data type validation provided by Pydantic here offers validation functions via the agent.output_validator decorator.

In [None]:
import os
from pydantic_ai import Agent, ModelRetry
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic import BaseModel

# Define the model
model = OpenAIChatModel('gpt-4o-mini')



# Define the agent
agent = Agent(model=model,
              retries=1,
              system_prompt="see if you have been given all three dimesion to describe the size of a box (width height depth). Answer with a single word either 'correct' or 'wrong'",)

# Define the result validator
@agent.output_validator
def output_validator_simple(data: str) -> str:

    print(f"output_validatorinput data:{data}" )
    if 'wrong' in data.lower():
        raise ModelRetry('wrong response')
    return data

# Run the agent
result = agent.run_sync("The box is 10x20x30 cm")
print(result.output)

result = agent.run_sync("The box is 10")
print(result.output)

## Validate structured Output

In [None]:
import os
from pydantic_ai import Agent, ModelRetry
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic import BaseModel, Field

from datetime import date
from pprint import pprint

# Define the model
model = OpenAIChatModel('gpt-4o-mini')


class PatientInfo(BaseModel):
    """Captures a patient's name and borthdate """
    name: str = Field(..., description="Full name of the patient")
    birth_date: date = Field(..., description="Patient's date of birth")

# Define the agent
agent = Agent(model=model,
              output_type=PatientInfo,
              retries=1,
              system_prompt="You are a writer for synthetic patient information")

@agent.output_validator
def is_valid_date(data: PatientInfo) ->PatientInfo:
    cutoff = date(1925, 1, 1)
    print(f"validating birthdate:{data.birth_date}" )
    if data.birth_date < cutoff:
        raise ModelRetry("Patient is too old generate a new one that is younger")
    return data

# Run the agent
result = agent.run_sync("Generate patient history for Jane Bond she is born 1900", model_settings={'temperature': 1.0})
print(result.output.birth_date)

## Multiple validators

In [None]:
import os
from pydantic_ai import Agent, RunContext, ModelRetry
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic import BaseModel, Field
from typing import List
from colorama import Fore, Style

model = OpenAIChatModel('gpt-4o-mini')

class PatientSummary(BaseModel):
    """Represents a brief summary of a hypothetical patient."""
    patient_id: str = Field(..., description="A unique identifier for the patient, e.g., PAT-001.")
    age: int = Field(..., description="The patient's age in years.")
    bmi: float = Field(..., description="The patient's Body Mass Index.")
    symptoms: List[str] = Field(..., description="A list of primary symptoms observed.")
    diagnosis_guess: str = Field(..., description="A preliminary, very brief possible diagnosis or area of concern.")

# 3. Define the AI Agent
agent = Agent(
    model=model,
    output_type=PatientSummary,
    system_prompt=(
        "You are a medical data simulator. Generate a plausible, hypothetical patient summary."
    ),
    retries=3
)

# --- Validator Functions ---
@agent.output_validator
def validate_age( result: PatientSummary) -> PatientSummary:
    if not (18 <= result.age <= 70):
        raise ModelRetry(f"Age {result.age} invalid. change the age.")
    return result

@agent.output_validator
def validate_bmi( result: PatientSummary) -> PatientSummary:
    lower_bound = 18.5
    upper_bound = 30.0
    if not (lower_bound <= result.bmi <= upper_bound):
        raise ModelRetry(f"BMI {result.bmi} invalid.change the BMI it should be between {lower_bound} and {upper_bound} .")
    return result

@agent.output_validator
def validate_symptoms( result: PatientSummary) -> PatientSummary:
    if not result.symptoms or not any(symptom.strip() for symptom in result.symptoms):
        raise ModelRetry("Symptoms invalid. change the symptoms")
    return result

# --- Run the Agent ---
print(Fore.BLUE + "\nAttempting to generate and validate patient summary..." + Style.RESET_ALL)

def run():
    patient_data = agent.run_sync("Generate a summary for an adult patient presenting with common flu-like symptoms with n BMI of 36.", model_settings={'temperature': 1.0})
    print(Fore.GREEN + "\n--- Validated Patient Summary ---" + Style.RESET_ALL)
    print(f"Patient ID: {patient_data.output.patient_id}")
    print(f"Age: {patient_data.output.age}")
    print(f"BMI: {patient_data.output.bmi}")
    print(f"Symptoms: {', '.join(patient_data.output.symptoms)}")
    print(f"Initial Diagnosis Guess: {patient_data.output.diagnosis_guess}")

run()

## Validate executable code