# Achieve Structured Output from LLMs

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
from langchain_openai import ChatOpenAI
model = ChatOpenAI(model='gpt-4.1-nano', max_completion_tokens = 1000)

## 1. TypedDict (inherits TypedDict)

In [None]:
from typing import TypedDict       # buit-in mypy module

class Person(TypedDict):
    name: str
    age: int

person1 : Person = {"name": "Alice", "age": 30} # suggests type but doesnt enforce
print(person1)

### Problem statement: Review to structured op {'summary':.., 'sentiment':...}

In [None]:
from typing import TypedDict, Annotated

sample_review_input = """
The hardware is great,but the software feels bloated.
There are too many pre-installed apps that I can't remove.
Also, the UI looks outdated compared to other brands.
Hoping for a software update to fix this ."""

class Review(TypedDict):    # output format/schema
    # summary: str          # OR add context specific metadata to a type using Annotated
    summary: Annotated[str, "A brief summary of the review"]
    # sentiment: str
    sentiment: Annotated[str, "Sentiment of the review. Either negative or positive or neutral"]

structured_model = model.with_structured_output(Review) # extra
result = structured_model.invoke(sample_review_input)   # not model.invoke
print(result)                                           # not result.content
print(type(result))                                     # dictionary
print(result['sentiment'])

In [None]:
# More flexible implementation

from typing import TypedDict, Annotated, Optional, Literal

# schema
class Review(TypedDict):

    key_themes: Annotated[list[str], "Write down all the key themes discussed in the review in a list"]
    summary: Annotated[str, "A brief summary of the review"]
    sentiment: Annotated[Literal["pos", "neg", "none"], "Return sentiment of the review either negative, positive or neutral"]
    pros: Annotated[Optional[list[str]], "Write down all the pros inside a list"]
    cons: Annotated[Optional[list[str]], "Write down all the cons inside a list"]
    name: Annotated[Optional[str], "Write the name of the reviewer"]
    

structured_model = model.with_structured_output(Review)

result = structured_model.invoke("""I recently upgraded to the Samsung Galaxy S24 Ultra, and I must say, it’s an absolute powerhouse! The Snapdragon 8 Gen 3 processor makes everything lightning fast—whether I’m gaming, multitasking, or editing photos. The 5000mAh battery easily lasts a full day even with heavy use, and the 45W fast charging is a lifesaver.

The S-Pen integration is a great touch for note-taking and quick sketches, though I don't use it often. What really blew me away is the 200MP camera—the night mode is stunning, capturing crisp, vibrant images even in low light. Zooming up to 100x actually works well for distant objects, but anything beyond 30x loses quality.

However, the weight and size make it a bit uncomfortable for one-handed use. Also, Samsung’s One UI still comes with bloatware—why do I need five different Samsung apps for things Google already provides? The $1,300 price tag is also a hard pill to swallow.

Pros:
Insanely powerful processor (great for gaming and productivity)
Stunning 200MP camera with incredible zoom capabilities
Long battery life with fast charging
S-Pen support is unique and useful

Review by Susamay
""")

print(result['name'])
print(result['cons'])

## 2. Pydantic (inherits BaseModel)

In [None]:
from pydantic import BaseModel, EmailStr, Field, field_validator
from typing import Optional

class Student(BaseModel):

    name: str = 'nitish'
    age: Optional[int] = None
    email: EmailStr
    cgpa: float = Field(gt=0, lt=10, default=5, description='A decimal value representing the cgpa of the student')
    
    @field_validator('name')
    @classmethod
    def transform_name(cls, value):
        if value[0] in 'aeiou': raise "Name must start with consonant"
        return value.upper()

new_student1 = {'age':'32', 'email':'abc@gmail.com'}
student1 = Student(**new_student1)      # pass as kwargs/ **dict
print(student1)                         # Object
print(student1.name)                    # Access Obj Attribute


student_dict1 = dict(student1)          # convert to dictionary
print(student_dict1['age'])

student_json = student1.model_dump_json()   # convert to json obj

student2 = Student(name='susamay', age=27, email='susamay.sk@gmail.com', cgpa=8.14)
print(student2)

In [None]:
# Pydantc-enforced implementation of previous typing module

from typing import TypedDict, Annotated, Optional, Literal # these are still helpful
from pydantic import BaseModel, EmailStr, Field 

prompt = """I recently upgraded to the Samsung Galaxy S24 Ultra, and I must say, it’s an absolute powerhouse! The Snapdragon 8 Gen 3 processor makes everything lightning fast—whether I’m gaming, multitasking, or editing photos. The 5000mAh battery easily lasts a full day even with heavy use, and the 45W fast charging is a lifesaver.

The S-Pen integration is a great touch for note-taking and quick sketches, though I don't use it often. What really blew me away is the 200MP camera—the night mode is stunning, capturing crisp, vibrant images even in low light. Zooming up to 100x actually works well for distant objects, but anything beyond 30x loses quality.

However, the weight and size make it a bit uncomfortable for one-handed use. Also, Samsung’s One UI still comes with bloatware—why do I need five different Samsung apps for things Google already provides? The $1,300 price tag is also a hard pill to swallow.

Pros:
Insanely powerful processor (great for gaming and productivity)
Stunning 200MP camera with incredible zoom capabilities
Long battery life with fast charging
S-Pen support is unique and useful

Review by Susamay
"""

# schema
class Review(BaseModel):
    # key_themes: Annotated[list[str], "Write down all the key themes discussed in the review in a list"]
    key_themes: list[str] = Field( description="Write down all the key themes discussed in the review in a list")
    summary: str = Field(description="A brief summary of the review")
    sentiment: Literal["pos", "neg", "none"] = Field(description="Return sentiment of the review either negative, positive or neutral")
    pros: Optional[list[str]] = Field(default=None, description= "Write down all the pros inside a list")
    cons: Optional[list[str]] = Field(default=None, description= "Write down all the cons inside a list")
    # name: Optional[str] = Field(default="UNKNOWN", description= "Write the name of the reviewer") # OR
    name: Annotated [ Optional[str], Field(default="UNKNOWN", description= "Write the name of the reviewer") ]


structured_model = model.with_structured_output(Review)
result = structured_model.invoke(prompt)

print(result.name)
# print(result['name']) # will not work by default, need to be converted to dict
# print(result.model_dump()['name'])
print(result.cons)

## 3. json-Schema (universal)

In [None]:
# Export pydantic to JSON schema dict
schema_dict = Review.model_json_schema()

# Save to file
import json
with open("07_json_schema.json", "w") as f:
    json.dump(schema_dict, f, indent=2)

In [None]:
# created a schema - 07_json_schema.json

import json

# Load from file or define inline
with open("07_json_schema.json") as f:
    json_schema = json.load(f)

structured_model = model.with_structured_output(schema= json_schema, method= "json_schema") # for agentic ai use - "function_calling"
response = structured_model.invoke(prompt)
print(response)