In [1]:
# =============================================================================
# Structured Output - Getting Typed Data from LLMs (LangChain 1.0+)
# =============================================================================
"""
=====================================================================
Structured Output - Beyond Raw Text
=====================================================================

LLMs naturally return free-form text, but applications need structured data.
Structured output forces LLMs to return data in specific formats.

Why Structured Output?
----------------------
- Parse movie reviews into {rating: int, sentiment: str, summary: str}
- Extract entities: {name: str, age: int, location: str}
- Get classification: {category: str, confidence: float}
- Build reliable data pipelines without regex parsing

Methods to Get Structured Output:
---------------------------------
1. with_structured_output() - Best method (uses native JSON mode)
2. Output parsers - Legacy approach with prompting
3. Function calling - Low-level tool-based approach

How It Works:
-------------
    User Query
         ‚îÇ
         ‚ñº
    ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
    ‚îÇ LLM + Schema        ‚îÇ  "Return a Movie object"
    ‚îÇ (with_structured)   ‚îÇ
    ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
         ‚îÇ
         ‚ñº
    ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
    ‚îÇ Validated Output    ‚îÇ  Movie(title="Inception", year=2010, ...)
    ‚îÇ (Pydantic Model)    ‚îÇ
    ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò

Updated for LangChain 1.0+ (2025-2026)
"""

import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

print("‚úÖ Environment configured for structured output examples")

‚úÖ Environment configured for structured output examples


In [2]:
# =============================================================================
# Schema Definition Options (LangChain 1.0+)
# =============================================================================
"""
Three Ways to Define Output Schemas
-----------------------------------
1. Pydantic BaseModel - Best for validation and documentation (RECOMMENDED)
2. TypedDict - Lightweight, returns dict instead of object
3. JSON Schema - Raw schema dict (advanced use)

Pydantic Benefits:
- Automatic validation
- Field descriptions become part of the prompt
- Default values
- Complex nested structures
"""

from langchain.chat_models import init_chat_model
from langchain_core.messages import SystemMessage, HumanMessage
from pydantic import BaseModel, Field
from typing import Optional, List
from typing_extensions import Annotated, TypedDict
from enum import Enum

# Initialize model
model = init_chat_model("gpt-4o-mini", model_provider="openai", temperature=0)

# =============================================================================
# Method 1: Pydantic BaseModel (RECOMMENDED)
# =============================================================================
"""
Pydantic models provide:
- Type validation
- Field descriptions (used by LLM)
- Default values
- Complex types (lists, enums, nested models)
"""

class Genre(str, Enum):
    """Movie genres as enum for constrained choices."""
    ACTION = "action"
    COMEDY = "comedy"
    DRAMA = "drama"
    SCIFI = "sci-fi"
    HORROR = "horror"
    OTHER = "other"

class Movie(BaseModel):
    """Schema for movie information extraction."""
    title: str = Field(..., description="The title of the movie")
    year: int = Field(..., description="The release year (e.g., 2014)")
    director: str = Field(..., description="The director's full name")
    genre: Genre = Field(..., description="The primary genre of the movie")
    rating: Optional[float] = Field(None, description="IMDB rating if known (0-10)")

print("=" * 60)
print("Method 1: Pydantic BaseModel")
print("=" * 60)

# Create structured output model
model_with_movie = model.with_structured_output(Movie)

# Get structured response
result = model_with_movie.invoke("Tell me about the movie Interstellar")

print(f"Type: {type(result).__name__}")
print(f"Title: {result.title}")
print(f"Year: {result.year}")
print(f"Director: {result.director}")
print(f"Genre: {result.genre}")
print(f"Rating: {result.rating}")

# =============================================================================
# Method 2: TypedDict (Lightweight)
# =============================================================================
"""
TypedDict returns a plain dict instead of a Pydantic object.
Useful when you don't need validation, just structure.
"""

class MovieDict(TypedDict):
    """Movie info as TypedDict - returns plain dict."""
    title: Annotated[str, "The title of the movie"]
    year: Annotated[int, "The release year"]
    director: Annotated[str, "The director's name"]

print("\n" + "=" * 60)
print("Method 2: TypedDict")
print("=" * 60)

model_with_dict = model.with_structured_output(MovieDict)
result_dict = model_with_dict.invoke("Tell me about The Dark Knight")

print(f"Type: {type(result_dict).__name__}")
print(f"Result: {result_dict}")

  from .autonotebook import tqdm as notebook_tqdm


Method 1: Pydantic BaseModel
Type: Movie
Title: Interstellar
Year: 2014
Director: Christopher Nolan
Genre: Genre.SCIFI
Rating: 8.6

Method 2: TypedDict
Type: dict
Result: {'title': 'The Dark Knight', 'year': 2008, 'director': 'Christopher Nolan'}


In [3]:
# =============================================================================
# Complex Nested Schemas
# =============================================================================
"""
Real-World Schema Design
------------------------
Production applications often need nested, complex structures.
Pydantic excels at this - just nest BaseModel classes.
"""

class Actor(BaseModel):
    """Actor information."""
    name: str = Field(..., description="Actor's full name")
    role: str = Field(..., description="Character name in the movie")

class Review(BaseModel):
    """Movie review structure."""
    rating: float = Field(..., ge=0, le=10, description="Rating from 0 to 10")
    summary: str = Field(..., description="Brief review summary")
    pros: List[str] = Field(default_factory=list, description="List of positive aspects")
    cons: List[str] = Field(default_factory=list, description="List of negative aspects")

class DetailedMovie(BaseModel):
    """Comprehensive movie information with nested structures."""
    title: str = Field(..., description="Movie title")
    year: int = Field(..., description="Release year")
    director: str = Field(..., description="Director's name")
    plot_summary: str = Field(..., description="Brief plot summary (2-3 sentences)")
    main_cast: List[Actor] = Field(..., description="List of main actors and their roles")
    review: Review = Field(..., description="Overall review of the movie")

print("=" * 60)
print("Complex Nested Schema Example")
print("=" * 60)

model_detailed = model.with_structured_output(DetailedMovie)
detailed_result = model_detailed.invoke(
    "Give me detailed information about the movie Inception (2010)"
)

print(f"\nüé¨ {detailed_result.title} ({detailed_result.year})")
print(f"üé• Directed by: {detailed_result.director}")
print(f"\nüìñ Plot: {detailed_result.plot_summary}")
print(f"\nüë• Main Cast:")
for actor in detailed_result.main_cast[:3]:  # Show first 3
    print(f"   - {actor.name} as {actor.role}")
print(f"\n‚≠ê Review: {detailed_result.review.rating}/10")
print(f"   {detailed_result.review.summary}")
if detailed_result.review.pros:
    print(f"   Pros: {', '.join(detailed_result.review.pros[:2])}")

Complex Nested Schema Example

üé¨ Inception (2010)
üé• Directed by: Christopher Nolan

üìñ Plot: Inception follows Dom Cobb, a skilled thief who specializes in the art of extraction, stealing secrets from deep within the subconscious during the dream state. He is offered a chance to have his criminal history erased if he can successfully perform 'inception'‚Äîthe act of planting an idea into a target's mind. As Cobb assembles a team to execute this complex heist, they navigate through layers of dreams, facing challenges that blur the lines between reality and illusion.

üë• Main Cast:
   - Leonardo DiCaprio as Dom Cobb
   - Joseph Gordon-Levitt as Arthur
   - Elliot Page as Ariadne

‚≠ê Review: 8.8/10
   A mind-bending thriller that challenges perceptions of reality and dreams, Inception is a masterclass in storytelling and visual effects.
   Pros: Innovative and original concept, Stunning visual effects


In [4]:
# =============================================================================
# Structured Output in Chains (LCEL)
# =============================================================================
"""
Combining with LCEL Pipelines
-----------------------------
Structured output models work seamlessly in LCEL chains.
The model acts like any other runnable.
"""

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

class SentimentAnalysis(BaseModel):
    """Sentiment analysis result."""
    sentiment: str = Field(..., description="Overall sentiment: positive, negative, or neutral")
    confidence: float = Field(..., ge=0, le=1, description="Confidence score 0-1")
    key_phrases: List[str] = Field(..., description="Key phrases that indicate sentiment")
    summary: str = Field(..., description="One sentence summary of the sentiment")

# Create a chain with structured output
sentiment_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a sentiment analysis expert. Analyze the given text carefully."),
    ("human", "Analyze the sentiment of this text:\n\n{text}")
])

# Chain: prompt ‚Üí structured model
sentiment_chain = sentiment_prompt | model.with_structured_output(SentimentAnalysis)

print("=" * 60)
print("Structured Output in LCEL Chain")
print("=" * 60)

# Test with different texts
texts = [
    "I absolutely loved this product! Best purchase I've ever made.",
    "The service was terrible and I want a refund immediately.",
    "The food was okay, nothing special but not bad either."
]

for text in texts:
    result = sentiment_chain.invoke({"text": text})
    print(f"\nüìù Text: {text[:50]}...")
    print(f"   Sentiment: {result.sentiment} ({result.confidence:.0%} confident)")
    print(f"   Key phrases: {', '.join(result.key_phrases[:2])}")

Structured Output in LCEL Chain

üìù Text: I absolutely loved this product! Best purchase I'v...
   Sentiment: positive (95% confident)
   Key phrases: absolutely loved, Best purchase

üìù Text: The service was terrible and I want a refund immed...
   Sentiment: negative (95% confident)
   Key phrases: service was terrible, want a refund immediately

üìù Text: The food was okay, nothing special but not bad eit...
   Sentiment: neutral (85% confident)
   Key phrases: food was okay, nothing special


In [5]:
# =============================================================================
# Error Handling and include_raw Option
# =============================================================================
"""
Handling Parsing Failures
-------------------------
Sometimes the LLM might not return valid structured output.
Use include_raw=True to get both the raw response and parsed result.

This is useful for:
- Debugging parsing failures
- Logging raw responses
- Fallback handling
"""

class SimpleExtraction(BaseModel):
    """Simple extraction for demonstration."""
    name: str = Field(..., description="Person's name")
    age: int = Field(..., description="Person's age as an integer")

# With include_raw=True, get both raw and parsed
model_with_raw = model.with_structured_output(SimpleExtraction, include_raw=True)

print("=" * 60)
print("include_raw=True Example")
print("=" * 60)

result_with_raw = model_with_raw.invoke(
    "Extract info: John is 30 years old and lives in New York"
)

print(f"\nüì¶ Result type: {type(result_with_raw).__name__}")
print(f"\nüîç Keys available: {result_with_raw.keys()}")
print(f"\n‚úÖ Parsed result:")
print(f"   Name: {result_with_raw['parsed'].name}")
print(f"   Age: {result_with_raw['parsed'].age}")
print(f"\nüìÑ Raw response type: {type(result_with_raw['raw']).__name__}")

# =============================================================================
# Handling Extraction from Longer Text
# =============================================================================
"""
Multi-Entity Extraction
-----------------------
Extract multiple items from unstructured text.
"""

class Person(BaseModel):
    """Person entity."""
    name: str = Field(..., description="Person's full name")
    role: Optional[str] = Field(None, description="Their role or job if mentioned")

class ExtractedEntities(BaseModel):
    """All extracted entities from text."""
    people: List[Person] = Field(default_factory=list, description="All people mentioned")
    organizations: List[str] = Field(default_factory=list, description="Organizations mentioned")
    locations: List[str] = Field(default_factory=list, description="Locations mentioned")

print("\n" + "=" * 60)
print("Multi-Entity Extraction")
print("=" * 60)

text = """
Apple CEO Tim Cook announced a new partnership with Microsoft's Satya Nadella 
at a conference in San Francisco. The deal was also supported by Amazon's 
Andy Jassy from their Seattle headquarters.
"""

extraction_model = model.with_structured_output(ExtractedEntities)
entities = extraction_model.invoke(f"Extract all entities from this text:\n\n{text}")

print(f"\nüìù Text: {text.strip()[:100]}...")
print(f"\nüë• People:")
for person in entities.people:
    print(f"   - {person.name}" + (f" ({person.role})" if person.role else ""))
print(f"\nüè¢ Organizations: {', '.join(entities.organizations)}")
print(f"üìç Locations: {', '.join(entities.locations)}")

include_raw=True Example

üì¶ Result type: dict

üîç Keys available: dict_keys(['raw', 'parsed', 'parsing_error'])

‚úÖ Parsed result:
   Name: John
   Age: 30

üìÑ Raw response type: AIMessage

Multi-Entity Extraction

üìù Text: Apple CEO Tim Cook announced a new partnership with Microsoft's Satya Nadella 
at a conference in Sa...

üë• People:
   - Tim Cook (CEO)
   - Satya Nadella
   - Andy Jassy

üè¢ Organizations: Apple, Microsoft, Amazon
üìç Locations: San Francisco, Seattle


In [None]:
# =============================================================================
# Summary: Structured Output in LangChain 1.0+
# =============================================================================
"""
=====================================================================
KEY TAKEAWAYS - Structured Output
=====================================================================

1. BASIC USAGE:
   -------------
   from pydantic import BaseModel, Field
   
   class MySchema(BaseModel):
       field1: str = Field(..., description="Field description")
       field2: int = Field(..., description="Another field")
   
   model_structured = model.with_structured_output(MySchema)
   result = model_structured.invoke("prompt")

2. SCHEMA DEFINITION OPTIONS:
   ---------------------------
   # Pydantic (RECOMMENDED)
   class Schema(BaseModel): ...
   
   # TypedDict (returns dict)
   class Schema(TypedDict): ...
   
   # JSON Schema (advanced)
   schema = {"type": "object", "properties": {...}}

3. NESTED STRUCTURES:
   -------------------
   class Inner(BaseModel):
       name: str
   
   class Outer(BaseModel):
       items: List[Inner]
       metadata: Inner

4. WITH LCEL CHAINS:
   ------------------
   chain = prompt | model.with_structured_output(Schema)
   result = chain.invoke({"input": "..."})

5. ERROR HANDLING:
   ----------------
   # Get raw response for debugging
   model.with_structured_output(Schema, include_raw=True)
   # Returns: {"parsed": Schema, "raw": AIMessage, "parsing_error": None}

6. BEST PRACTICES:
   ----------------
   - Always add Field descriptions (LLM uses them)
   - Use Optional for fields that might not exist
   - Use Enum for constrained choices
   - Use List for multiple items
   - Set temperature=0 for consistent output
   - Test with varied inputs

Common Imports:
---------------
from pydantic import BaseModel, Field
from typing import Optional, List
from enum import Enum
from langchain.chat_models import init_chat_model

=====================================================================
"""

print("=" * 60)
print("Structured Output Module Complete!")
print("=" * 60)
print("""
Next Steps:
-----------
1. 6-middleware.ipynb - Request/response middleware

Use Cases for Structured Output:
--------------------------------
- Data extraction from documents
- Classification tasks
- Entity recognition
- Form filling automation
- API response formatting
- Survey/feedback analysis
""")