<a href="https://colab.research.google.com/github/Fuenfgeld/Agent_Tutorial_PydanticAI/blob/main/02_StructuredData_ResultValidators_PydanticAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip -q install pydantic-ai
%pip -q install nest_asyncio
%pip -q install logfire

In [None]:
!git clone https://github.com/Fuenfgeld/Agent_Tutorial_PydanticAI.git

In [None]:
import os
from google.colab import userdata

keyAntropic = userdata.get('Claude')
keyOpenAI = userdata.get('openAI')
keyLogFire = userdata.get('logfire')


os.environ["OPENAI_API_KEY"] = keyOpenAI
os.environ["ANTHROPIC_API_KEY"] = keyAntropic

import nest_asyncio
nest_asyncio.apply()
#logfire.configure(token=keyLogFire)

# Structured Data models as output
https://ai.pydantic.dev/output/#structured-output

In [None]:
import os
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIModel
from pydantic import BaseModel

# Define the model
model = OpenAIModel('gpt-4o-mini')

# Define the output model
class Box(BaseModel):
    """Caputers the size of a box with its units"""
    width: int
    height: int
    depth: int
    units: str

# Define the agent
agent = Agent(model=model,
              result_type=Box,
              system_prompt="Extract me the dimensions of a box",)

# Run the agent
result = agent.run_sync("The box is 10x20x30 cm")

print( f"""
The result is a pydantic data class of type: {type(result.output)}
within the width: {result.output.width} width type: {type(result.output.width)}
within the unit: {result.output.units} units type: {type(result.output.units)}
""")

## Nested Data Structure

In [None]:
import os
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIModel
from pydantic import BaseModel, Field

from typing import List, Optional
from datetime import date
from enum import Enum
from pprint import pprint

# Define the model
model = OpenAIModel('gpt-4o-mini')


# Enum to represent gender options — teaches use of fixed choices
class Gender(str, Enum):
    """Enum to represent gender options"""
    male = "male"
    female = "female"
    other = "other"


class Allergy(BaseModel):
    """Describes an allergy, including the substance and optional reaction details"""
    substance: str  = Field(...,description="What the patient is allergic to ")
    reaction: Optional[str]     = Field(...,description="Description of the reaction")


class Medication(BaseModel):
    """Describes a medication the patient is currently taking"""
    name: str = Field(..., description="Name of the medication ")
    dose_mg: float = Field(..., description="Dosage in milligrams ")
    frequency_per_day: int = Field(..., description="How many times per day the medication is taken")


class Condition(BaseModel):
    """Represents a past or current medical condition"""
    name: str = Field(..., description="Name of the condition (e.g., 'Asthma')") # it can be problematic to prime LLM models wiyth examples
    diagnosed_date: Optional[date] = Field(None, description="Date the condition was diagnosed")
    chronic: bool = Field(..., description="Whether the condition is long-term")


class PatientHistory(BaseModel):
    """Captures a patient's overall medical history including medications, allergies, and diagnoses"""
    name: str = Field(..., description="Full name of the patient")
    birth_date: date = Field(..., description="Patient's date of birth")
    gender: Gender = Field(..., description="Patient's gender")
    height_cm: Optional[float] = Field(None, description="Height in centimeters")
    weight_kg: Optional[float] = Field(None, description="Weight in kilograms")
    smoker: bool = Field(..., description="Whether the patient currently smokes")

    allergies: List[Allergy] = Field(default_factory=list, description="List of known allergies")
    medications: List[Medication] = Field(default_factory=list, description="List of current medications")
    conditions: List[Condition] = Field(default_factory=list, description="List of medical conditions")

# Define the agent
agent = Agent(model=model,
              result_type=PatientHistory,
              system_prompt="You are a writer for synthetic patient history, the history will be used as examples for students",)

# Run the agent
result = agent.run_sync("Generate patient history for Jane Bond", model_settings={'temperature': 1.0})

print( f"""
The result is a pydantic data class of type: {type(result.output)}
here are the details:
""")
pprint(result.output.model_dump_json(indent=2))

## Data extraction from unstructured text

In [None]:
with open("/content/Agent_Tutorial_PydanticAI/data/JoubertEng.txt", 'r') as file:
    dr_letter = file.read()

In [None]:
print(dr_letter)

### data Model

In [None]:
import os
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIModel
from pydantic import BaseModel, Field

from typing import List, Optional
from datetime import date
from enum import Enum
from pprint import pprint

# Define the model
model = OpenAIModel('gpt-4o-mini')


class Medication(BaseModel):
    name: str
    dosage: Optional[str] = None
    frequency: Optional[str] = None

    def __str__(self):
        parts = [self.name]
        if self.dosage:
            parts.append(self.dosage)
        if self.frequency:
            parts.append(self.frequency)
        return ": ".join(parts)

class PatientHistory(BaseModel):
    chief_complaint: str
    current_medications: List[Medication] = []
    allergies: List[str] = []
    other_history: Optional[str] = None

class MedicalLetter(BaseModel):
    # Patient and visit information
    patient_name: str
    patient_dob: date
    visit_date: date
    case_number: str
    insurance: Optional[str] = None

    # Clinical information
    patient_history: PatientHistory

    # Examination findings (simplified to text descriptions)
    examination_findings: List[str] = []

    # Diagnosis and treatment
    diagnoses: List[str]
    recommendations: List[str] = []
    prescribed_medications: List[Medication] = []
    follow_up: Optional[str] = None

# Define the agent
agent = Agent(model=model,
              result_type=MedicalLetter,
              system_prompt="You are a medical coding assisstent to extract infromation from doctors letter into a structured data model",)

# Run the agent
result = agent.run_sync(f"extract the data for the following letter: {dr_letter} ", model_settings={'temperature': 0})

print( f"""
The result is a pydantic data class of type: {type(result.output)}
here are the details:
""")
pprint(result.output.model_dump_json(indent=2))

#Output Validators

In [None]:
import os
from pydantic_ai import Agent, ModelRetry
from pydantic_ai.models.openai import OpenAIModel
from pydantic import BaseModel

# Define the model
model = OpenAIModel('gpt-4o-mini')



# Define the agent
agent = Agent(model=model,
              retries=1,
              system_prompt="see if you have been given all three dimesion to describe the size of a box (width height depth). Answer with a single word either 'correct' or 'wrong'",)

# Define the result validator
@agent.output_validator
def output_validator_simple(data: str) -> str:

    print(f"output_validatorinput data:{data}" )
    if 'wrong' in data.lower():
        raise ModelRetry('wrong response')
    return data

# Run the agent
result = agent.run_sync("The box is 10x20x30 cm")
print(result.output)

result = agent.run_sync("The box is 10")
print(result.output)

## Validate structured Output

## Multiple validators

## Validate executable code