# Structured Output

Models can be requested to provide their response in a format matching a given schema. This is useful for ensuring the output
can be easily parsed and used in subsequent processing. LangChain supports multiple schema types and methods for enforcing structured outputs.

### Pydantic 

Pydantic models provide the richest feature set with field validation, descriptions and nested structures.

In [2]:
import os
from langchain.chat_models import init_chat_model

os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")
model=init_chat_model(model="groq:qwen/qwen3-32b")
model

ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 16384, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': True, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x112716cf0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x112717a10>, model_name='qwen/qwen3-32b', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [3]:
from pydantic import BaseModel, Field

class Movie(BaseModel):
    title: str = Field(description="Title of the movie")
    year: int = Field(description="Year of movie release")
    director: str = Field(description="Director of the movie")
    genre: list[str] = Field(description="Genre of the movie")
    rating: float = Field(description="Rating of the movie out of 10")

In [4]:
model_with_structure=model.with_structured_output(Movie)
model_with_structure

RunnableBinding(bound=ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 16384, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': True, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x112716cf0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x112717a10>, model_name='qwen/qwen3-32b', model_kwargs={}, groq_api_key=SecretStr('**********')), kwargs={'tools': [{'type': 'function', 'function': {'name': 'Movie', 'description': '', 'parameters': {'properties': {'title': {'description': 'Title of the movie', 'type': 'string'}, 'year': {'description': 'Year of movie release', 'type': 'integer'}, 'director': {'description': 'Director of the movie', 'type': 'string'}, 'genre': {'description': 'Genre of the movie', 'items': {'type': 'string'}, 'type': 'array'}, 'rating': {'description': 'Rating of the movie 

In [5]:
model_with_structure.invoke("Provide details of the movie Inception")

Movie(title='Inception', year=2010, director='Christopher Nolan', genre=['Science Fiction', 'Action', 'Thriller'], rating=8.8)

In [6]:
model.invoke("Provide details of the movie Inception")

AIMessage(content='<think>\nOkay, so I need to provide details about the movie Inception. Let me start by recalling what I know. Inception is a 2010 film directed by Christopher Nolan. The main actor is Leonardo DiCaprio, who plays Dom Cobb. The movie is a sci-fi thriller involving dreams and heists. \n\nFirst, the plot. I think the main idea is about entering someone\'s mind to plant an idea, which is called "inception." The team uses technology to enter dreams and influence the subconscious. The leader of the team is Cobb, a professional thief who steals information by infiltrating the subconscious. The target is Robert Fischer, a wealthy man whose father is on his deathbed. The team\'s goal is to plant an idea that will cause the company to be divided, which is an example of inception.\n\nThe team includes different members with specific roles. There\'s Arthur (played by Joseph Gordon-Levitt), who is the problem solver. He can change scenarios in the dream. Ariadne (Ellen Page) is t

In [7]:
model_with_structure.invoke("Provide details of the movie 127 hours")

Movie(title='127 Hours', year=2010, director='Danny Boyle', genre=['Biography', 'Drama', 'Thriller'], rating=8.0)

#### Message Output with Parsed structure

With include_raw=True option we will be able to get the AIMessage and the Parsed output

In [8]:
from pydantic import BaseModel, Field

class Movie(BaseModel):
    title: str = Field(..., description="Title of the movie")
    year: int = Field(..., description="Year of movie release")
    director: str = Field(..., description="Director of the movie")
    genre: list[str] = Field(..., description="Genre of the movie")
    rating: float = Field(..., description="Rating of the movie out of 10")

model_with_structure=model.with_structured_output(Movie, include_raw=True)
model_with_structure

{
  raw: RunnableBinding(bound=ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 16384, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': True, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x112716cf0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x112717a10>, model_name='qwen/qwen3-32b', model_kwargs={}, groq_api_key=SecretStr('**********')), kwargs={'tools': [{'type': 'function', 'function': {'name': 'Movie', 'description': '', 'parameters': {'properties': {'title': {'description': 'Title of the movie', 'type': 'string'}, 'year': {'description': 'Year of movie release', 'type': 'integer'}, 'director': {'description': 'Director of the movie', 'type': 'string'}, 'genre': {'description': 'Genre of the movie', 'items': {'type': 'string'}, 'type': 'array'}, 'rating': {'description': 'Rating of t

In [9]:
response=model_with_structure.invoke("Provide details about the movie Inception")
response

{'raw': AIMessage(content='', additional_kwargs={'reasoning_content': 'Okay, the user is asking for details about the movie Inception. Let me check if I have that information. The available function is called Movie, and it requires parameters like title, year, director, genre, and rating.\n\nFirst, I need to confirm the title is "Inception". The year it was released is 2010. The director is Christopher Nolan. The genres might include Science Fiction and Action. For the rating, I think it\'s around 8.8 on IMDb. Let me make sure all required fields are covered. Yep, title, year, director, genre, and rating are all there. I\'ll structure the function call with these details.\n', 'tool_calls': [{'id': 'yy8yw5r14', 'function': {'arguments': '{"director":"Christopher Nolan","genre":["Science Fiction","Action"],"rating":8.8,"title":"Inception","year":2010}', 'name': 'Movie'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 196, 'prompt_tokens': 252, 'total_token

#### Nested Structure

In [10]:
from pydantic import BaseModel, Field

class Actor(BaseModel):
    name: str
    role: str

class MovieDetails(BaseModel):
    title: str = Field(..., description="Title of the movie")
    year: int = Field(..., description="Year of movie release")
    director: str = Field(..., description="Director of the movie")
    genre: list[str] = Field(..., description="Genre of the movie")
    rating: float = Field(..., description="Rating of the movie out of 10")
    actors: list[Actor] = Field(..., description="Cast of actors in the movie")
    budget: float | None = Field(..., description="Budget of the movie in millions USD")

model_with_moviedetails=model.with_structured_output(MovieDetails)

response=model_with_moviedetails.invoke("Provide details of the movie Inception")
response

MovieDetails(title='Inception', year=2010, director='Christopher Nolan', genre=['Science Fiction', 'Action'], rating=8.8, actors=[Actor(name='Leonardo DiCaprio', role="Dominick 'Dom' Cobb"), Actor(name='Joseph Gordon-Levitt', role='Arthur'), Actor(name='Ellen Page', role='Ariadne'), Actor(name='Tom Hardy', role='Eames')], budget=160.0)

### TypedDict

TypedDict provides a simpler alternative  using Python's built-in typing, ideal when you don't need runtime validation

In [11]:
from typing_extensions import TypedDict, Annotated

class MovieDict(TypedDict):
    """ Movie Details TypedDict schema """
    title: Annotated[str, ..., "Title of the movie"]
    year: Annotated[int, ..., "The year the movie was released"]
    director: Annotated[str, ..., "Director of the movie"]
    rating: Annotated[float, ..., "Rating of the movie on 10"]

model_with_typeddict = model.with_structured_output(MovieDict)
model_with_typeddict

RunnableBinding(bound=ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 16384, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': True, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x112716cf0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x112717a10>, model_name='qwen/qwen3-32b', model_kwargs={}, groq_api_key=SecretStr('**********')), kwargs={'tools': [{'type': 'function', 'function': {'name': 'MovieDict', 'description': 'Movie Details TypedDict schema ', 'parameters': {'type': 'object', 'properties': {'title': {'description': 'Title of the movie', 'type': 'string'}, 'year': {'description': 'The year the movie was released', 'type': 'integer'}, 'director': {'description': 'Director of the movie', 'type': 'string'}, 'rating': {'description': 'Rating of the movie on 10', 'type': 'number'}}, 're

In [12]:
response=model_with_typeddict.invoke("Provide details about movie Inception")
response

{'director': 'Christopher Nolan',
 'rating': 8.8,
 'title': 'Inception',
 'year': 2010}

#### Nested Structure

In [16]:
class Actor(TypedDict):
    name: str
    role: str

class MovieDetailsDict(TypedDict):
    title: str
    year: int
    cast: list[Actor]
    genre: list[str]
    budget: Annotated[float , "Budget of the movie in millions of USD"]

model_with_movieDict=model.with_structured_output(MovieDetailsDict)
resp=model_with_movieDict.invoke("Provide details about the movie Avengers")
resp

{'budget': 200,
 'cast': [{'name': 'Robert Downey Jr.', 'role': 'Iron Man'},
  {'name': 'Chris Evans', 'role': 'Captain America'},
  {'name': 'Mark Ruffalo', 'role': 'Hulk'},
  {'name': 'Chris Hemsworth', 'role': 'Thor'},
  {'name': 'Scarlett Johansson', 'role': 'Black Widow'}],
 'genre': ['Action', 'Adventure', 'Science Fiction'],
 'title': 'Avengers',
 'year': 2012}

In [17]:
resp=model_with_movieDict.invoke("Provide details about the movie Inception")
resp

{'budget': 160,
 'cast': [{'name': 'Leonardo DiCaprio', 'role': 'Dom Cobb'},
  {'name': 'Joseph Gordon-Levitt', 'role': 'Arthur'},
  {'name': 'Ellen Page', 'role': 'Mal'}],
 'genre': ['Science Fiction', 'Action'],
 'title': 'Inception',
 'year': 2010}

In [18]:
model.profile

{'max_input_tokens': 131072,
 'max_output_tokens': 16384,
 'image_inputs': False,
 'audio_inputs': False,
 'video_inputs': False,
 'image_outputs': False,
 'audio_outputs': False,
 'video_outputs': False,
 'reasoning_output': True,
 'tool_calling': True}

### Dataclasses

A Data class is a class typically containing mainly data, although there arent many restrictions. You create it using the @dataclass decorator

In [19]:
import os
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

#### In Pydantic structure we use model.with_structured_output(PydanticClass) but we can provide the pydantic class in create_agent call in "response_format" field

In [20]:
from pydantic import BaseModel, Field
from langchain.agents import create_agent

class ContactInfo(BaseModel):
    """ Contact information of the person """ 
    name: str = Field(description="Name of the person")
    email: str = Field(description="Email of the person")
    phone: str = Field(description="Phone number of the person")

agent= create_agent(
    model="gpt-5",
    response_format=ContactInfo
)

In [21]:
result=agent.invoke({
    "messages": [{"role": "user", "content": "Extract info from: Neela Natarajan, neelanatarajan@example.com, (469) 363-2545"}]
})
print(result)

{'messages': [HumanMessage(content='Extract info from: Neela Natarajan, neelanatarajan@example.com, (469) 363-2545', additional_kwargs={}, response_metadata={}, id='a5a1f226-372f-421f-8214-358d8e210393'), AIMessage(content='{"name":"Neela Natarajan","email":"neelanatarajan@example.com","phone":"(469) 363-2545"}', additional_kwargs={'parsed': None, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 1002, 'prompt_tokens': 205, 'total_tokens': 1207, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 960, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-D0PvY80NjskYfonhUcBHoFmldVBGk', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019be024-51c2-7a42-91b6-a36734658c04-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'i

{'messages': [HumanMessage(content='Extract info from: Neela Natarajan, neelanatarajan@example.com, (469) 363-2545', additional_kwargs={}, response_metadata={}, id='a5a1f226-372f-421f-8214-358d8e210393'), 
AIMessage(content='{"name":"Neela Natarajan","email":"neelanatarajan@example.com","phone":"(469) 363-2545"}', additional_kwargs={'parsed': None, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 1002, 'prompt_tokens': 205, 'total_tokens': 1207, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 960, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-D0PvY80NjskYfonhUcBHoFmldVBGk', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019be024-51c2-7a42-91b6-a36734658c04-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 205, 'output_tokens': 1002, 'total_tokens': 1207, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 960}})], 
'structured_response': ContactInfo(name='Neela Natarajan', email='neelanatarajan@example.com', phone='(469) 363-2545')}

In [22]:
result['structured_response']

ContactInfo(name='Neela Natarajan', email='neelanatarajan@example.com', phone='(469) 363-2545')

#### TypedDict

In [24]:
## TypedDict
from typing_extensions import TypedDict
from langchain.agents import create_agent

class ContactInfo(TypedDict):
    """ Contact information of the person """
    name: str # Name of the person
    email: str # Email of the person
    phone: str # Phone number of the person

agent=create_agent(
    model="gpt-5",
    response_format=ContactInfo
)

In [25]:
result=agent.invoke({
    "messages": [{"role": "user", "content": "Extract contact info from: Neela Natarajan, (469) 254-2278, neelanatarajan@example.com"}]
})

result

{'messages': [HumanMessage(content='Extract contact info from: Neela Natarajan, (469) 254-2278, neelanatarajan@example.com', additional_kwargs={}, response_metadata={}, id='d4c64872-4a8a-4752-88f3-50b0645c9cb0'),
  AIMessage(content='{"name":"Neela Natarajan","email":"neelanatarajan@example.com","phone":"(469) 254-2278"}', additional_kwargs={'parsed': None, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 746, 'prompt_tokens': 184, 'total_tokens': 930, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 704, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-D0Q1tx1cgVRw4XiFQYMqiKdmXnjjQ', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019be02a-4f28-7a61-bf82-f4f75ef41a3c-0', tool_calls=[], invalid_tool_calls=[], usage_meta

In [27]:
result['structured_response']

{'name': 'Neela Natarajan',
 'email': 'neelanatarajan@example.com',
 'phone': '(469) 254-2278'}

### Dataclasses

In [28]:
from dataclasses import dataclass
from langchain.agents import create_agent

@dataclass
class ContactInfo:
    """ Contact information of the person """ 
    name: str # Name of the person
    email: str # Email of the person
    phone: str # Phone number of the person

agent=create_agent(
    model="gpt-5",
    response_format=ContactInfo
)

In [29]:
resp=agent.invoke({
    "messages": [{"role":"user", "content": "Extract information from Neela Natarajan, (469) 363 2545 and neelanatarajan@example.com"}]
})
resp

{'messages': [HumanMessage(content='Extract information from Neela Natarajan, (469) 363 2545 and neelanatarajan@example.com', additional_kwargs={}, response_metadata={}, id='b62a0c78-70ed-4624-93be-54cad178f12f'),
  AIMessage(content='{"name":"Neela Natarajan","email":"neelanatarajan@example.com","phone":"(469) 363 2545"}', additional_kwargs={'parsed': None, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 490, 'prompt_tokens': 175, 'total_tokens': 665, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 448, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-D0RFSYQgHyDxoiofpG5R4qJqFExAW', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019be071-cfd5-7871-8db7-0315a6b81239-0', tool_calls=[], invalid_tool_calls=[], usage_met

In [30]:
resp['structured_response']

ContactInfo(name='Neela Natarajan', email='neelanatarajan@example.com', phone='(469) 363 2545')