### TypedDict

Cannot validate data, but its faster

In [5]:
from langchain_openai import ChatOpenAI
from typing import List, Optional, TypedDict, Annotated
from dotenv import load_dotenv

load_dotenv()

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.6)

class Movie(TypedDict):
    title: Annotated[str, "The title of the movie"]
    release_year: Annotated[int, "The release year of the movie"]
    genres: Annotated[List[str], "The genres of the movie belongs to"]
    rating: Annotated[float, "The rating of the movie"]
    box_office: Annotated[Optional[float], "The box office of the movie"]

structured_llm = llm.with_structured_output(Movie)

result = structured_llm.invoke("give me details of movie named Inception")

print(result)
print(type(result))
print(result["title"])

{'title': 'Inception', 'release_year': 2010, 'genres': ['Action', 'Adventure', 'Sci-Fi'], 'rating': 8.8, 'box_office': 829895144}
<class 'dict'>
Inception


### Pydantic

Can validate data in run time (Industry standard)

In [13]:
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from typing import List, Optional
from dotenv import load_dotenv

load_dotenv()

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.6)

class Movie(BaseModel):
    title: str = Field(..., description="The title of the movie")
    release_year: str = Field(..., description="The release year of the movie")
    genres: List[str] = Field(..., description="The genres of the movie belongs to")
    rating: float = Field(..., description="The rating of the movie")
    box_office: Optional[float] = Field(..., description="The box office of the movie")


structured_llm = llm.with_structured_output(Movie)

result = structured_llm.invoke("Give me details of movie named Inception")

print(result)   
print(result.title)
print(type(result)) # in Pydantic thi is a instance of Movie
print(result.model_dump()) # Generates a dictionary from class
print(result.model_dump_json()) # Generates a json from class


title='Inception' release_year='2010' genres=['Action', 'Sci-Fi', 'Thriller'] rating=8.8 box_office=836836967.0
Inception
<class '__main__.Movie'>
{'title': 'Inception', 'release_year': '2010', 'genres': ['Action', 'Sci-Fi', 'Thriller'], 'rating': 8.8, 'box_office': 836836967.0}
{"title":"Inception","release_year":"2010","genres":["Action","Sci-Fi","Thriller"],"rating":8.8,"box_office":836836967.0}


### Json Schema

In [2]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

load_dotenv()

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.6)


movie_json_schema = {
    "name": "movie_schema",
    "description": "A movie schema for extracting movie details",
    "parameters": {
        "type": "object",
        "properties": {
            "title": {
                "type": "string",
                "description": "The title of the movie"
            },
            "release_year": {
                "type": "string",
                "description": "The release year of the movie"
            },
            "genres": {
                "type": "array",
                "items": {
                    "type": "string"
                },
                "description": "The genres of the movie belongs to"
            },
            "rating": {
                "type": "number",
                "description": "The rating of the movie"
            },
            "box_office": {
                "type": "number",
                "description": "The box office of the movie"
            }
        }
    },
    "required": ["title", "release_year", "genres", "rating", "box_office"],
}


structured_llm = llm.with_structured_output(movie_json_schema)

result = structured_llm.invoke("Give me details of movie named Inception")

print(result)

{'title': 'Inception', 'release_year': '2010', 'genres': ['Science Fiction', 'Action', 'Adventure', 'Thriller'], 'rating': 8.8, 'box_office': 836836967}


#### Product review case (Pydantic)

In [None]:
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from typing import List, Optional
from dotenv import load_dotenv

load_dotenv()

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.6)

class ProductReview(BaseModel):
    product_name: str = Field(..., description="Name of the product being reviewed")
    reviewer_name: str = Field(..., description="Name of the reviewer")
    rating: float = Field(..., description="The rating of the product on a scale of 1 to 5")
    pros: List[str] = Field(..., description="List of positive aspects of the product")
    cons: List[str] = Field(..., description="List of negative aspects of the product")
    review_text: str = Field(..., description="Detailed review of the product")
    would_recommend: bool = Field(..., description="Whether the reviewer would recommend the product")
    purchase_date: Optional[str] = Field(None, description="Date of purchase of the product")

structured_llm = llm.with_structured_output(ProductReview)

result = structured_llm.invoke("""
I purchased these Sony WH-1000XM5 headphones approximately two months ago and I can say without
hesitation that it was one of the best tech acquisitions I've ever made. As someone who works
remotely and travels frequently, audio quality and noise cancellation are absolutely essential
in my daily life.

The active noise cancellation is simply exceptional. I tested it in various environments - airplanes,
busy coffee shops, noisy offices - and the results always impress. It manages to block out practically
all ambient noise, allowing total immersion in music or concentration at work. The transparency mode
also works perfectly when I need to hear announcements or talk to someone without removing the headphones.

The audio quality is premium. The bass is deep without being excessive, the mids are clear, and the
highs are crisp. I tested them with various music genres - from classical music to heavy rock - and
the reproduction is always faithful and balanced. For video calls, the microphones capture my voice
with impressive clarity.

Comfort is another very high point. The cushions are soft and don't squeeze even after 6-8 hours of
continuous use. The headband adjusts perfectly to the head without causing discomfort. The battery
easily lasts 30 hours with ANC activated, exactly as promised.

The negative points are minimal: the price is steep (I paid $450), it's not foldable like the previous
model, and occasionally the touch sensor activates accidentally. The case could be more compact.

Overall, for those seeking the best in audio quality and noise cancellation, it's worth every penny.
I strongly recommend it, especially for demanding professionals and audiophiles.
""")

print(result)