# Task 1: Define the Pydantic Data Model
This task defines the Pydantic data model representing the structured format 
for the LLM response. The Weather model includes fields that capture essential 
data points including temperature, humidity, and rainfall with appropriate validations.

In [None]:
# Setup and Configuration
# =======================

# Import modules from pydantic
# Pydantic modules are used to create data models with automatic validation
from pydantic import BaseModel, Field, field_validator

# Define the Pydantic Data Model
# ==============================

# Define the Weather Pydantic model
# Pydantic Models are classes that define the fields as annotated attributes
class Weather(BaseModel):
    temperature: float = Field(description="The temperature of the city in Fahrenheit")
    humidity: float = Field(description="The humidity percentage")
    rainfall: float = Field(description="The amount of rainfall in mm")
    
    # Field validators are methods that validate the data
    @field_validator("temperature", mode="before")
    @classmethod
    def temperature_cannot_be_zero(cls, value):
        if value == 0:
            raise ValueError("Temperature cannot be zero")
        return value
    
    @field_validator("humidity", mode="before")
    @classmethod
    def humidity_must_be_valid(cls, value):
        if value < 30 or value > 60:
            raise ValueError("Humidity must be between 30 and 60 percent")
        return value
    
    @field_validator("rainfall", mode="before")
    @classmethod
    def rainfall_must_be_valid(cls, value):
        if value < 0 or value > 100:
            raise ValueError("Rainfall must be between 0 and 100 mm")
        return value

# Task 2: Build the Chain using Pydantic Output Parser
This task builds the chain using a prompt template, model, and the Pydantic output parser.
We form the prompt template with system and human prompts, initialize the ChatOpenAI model,
create a PydanticOutputParser instance, and create the chain using prompt, model, and parser.


In [None]:
from langchain_core.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import PydanticOutputParser
from dotenv import load_dotenv
import os
from langchain_openai import AzureChatOpenAI

# Build the Chain using Pydantic Output Parser
# ============================================

# Form the prompt template
# Create the system prompt to extract temperature, humidity, and rainfall from weather description
system_message = """Extract {weather_properties} from the given weather data.
{format_instructions}"""
system_prompt = SystemMessagePromptTemplate.from_template(system_message)

# Create the human prompt with the weather description
human_message = "Weather description: {weather_description}"
human_prompt = HumanMessagePromptTemplate.from_template(human_message)

# Get the main prompt
main_prompt = ChatPromptTemplate.from_messages([
    system_prompt,
    human_prompt
])

# Create the model
# Initialize the ChatOpenAI model with appropriate parameters
load_dotenv()

llm = AzureChatOpenAI(
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    temperature=0
)

# Create the parser
# PydanticOutputParser is used to parse an output using a pydantic model
pydantic_parser = PydanticOutputParser(pydantic_object=Weather)
print(f"pydantic_parser.get_format_instructions() = {pydantic_parser.get_format_instructions()}")

# Create the chain
# Create the chain using the main prompt, model, and parser
chain = main_prompt | llm | pydantic_parser

pydantic_parser.get_format_instructions() = The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"temperature": {"description": "The temperature of the city in Fahrenheit", "title": "Temperature", "type": "number"}, "humidity": {"description": "The humidity percentage", "title": "Humidity", "type": "number"}, "rainfall": {"description": "The amount of rainfall in mm", "title": "Rainfall", "type": "number"}}, "required": ["temperature", "humidity", "rainfall"]}
```


# Task 3: Test and Validate the Parser
This task tests and validates the parser by providing various weather descriptions
and checking the structured output against the Pydantic model. We test with weather
descriptions that include all properties, invalid humidity values, and invalid rainfall values.

In [8]:
# Test the parser with a success case
# Test with a weather description that includes all the weather properties
weather_description = "Today is sunny, making it a perfect day to spend outdoors with clear skies and bright sunshine. As you step outside, you might also feel a gentle breeze that adds to the pleasant atmosphere. With temperatures reaching a comfortable 75 degrees Fahrenheit, it is an ideal time for outdoor activities or simply relaxing in the sun. Although the humidity is 30%, it is able to get a rainfall of 20mm every day."

test_response = chain.invoke(
    {
        "weather_properties": ["temperature", "humidity", "rainfall"],
        "weather_description": weather_description,
        "format_instructions": pydantic_parser.get_format_instructions()
    }
)

print(test_response)


temperature=75.0 humidity=30.0 rainfall=20.0


In [9]:
# Test the parser with a failure case
# Test with a weather description that has an invalid humidity value (below 30%)
weather_description = "It's a chilly morning, prompting many to don their light jackets or sweaters before heading out. The air is crisp, filled with the soothing sounds of birds chirping, as the day begins to unfold. Humidity hovers at 20% with a rainfall of 10mm. Temperatures hover around 50 degrees, but as the sun rises higher, it promises a gradual warming that will provide a refreshing start to the day."

test_response = chain.invoke(
    {
        "weather_properties": ["temperature", "humidity", "rainfall"],
        "weather_description": weather_description,
        "format_instructions": pydantic_parser.get_format_instructions()
    }
)

print(test_response)

OutputParserException: Failed to parse Weather from completion {"temperature": 50, "humidity": 20, "rainfall": 10}. Got: 1 validation error for Weather
humidity
  Value error, Humidity must be between 30 and 60 percent [type=value_error, input_value=20, input_type=int]
    For further information visit https://errors.pydantic.dev/2.11/v/value_error
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

In [10]:
# Test the parser with another failure case
# Test with a weather description that has an invalid rainfall value (above 100mm)
weather_description = "The weather today is quite unusual with a temperature of 68 degrees Fahrenheit and a humidity level of 40%. However, the forecast predicts an extremely high rainfall of 150mm, which is outside the normal range."

test_response = chain.invoke(
    {
        "weather_properties": ["temperature", "humidity", "rainfall"],
        "weather_description": weather_description,
        "format_instructions": pydantic_parser.get_format_instructions()
    }
)

print(test_response)

OutputParserException: Failed to parse Weather from completion {"temperature": 68, "humidity": 40, "rainfall": 150}. Got: 1 validation error for Weather
rainfall
  Value error, Rainfall must be between 0 and 100 mm [type=value_error, input_value=150, input_type=int]
    For further information visit https://errors.pydantic.dev/2.11/v/value_error
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 