In [1]:
from typing import TypedDict

class Person(TypedDict):
    name: str
    age: int
    email: str

person: Person = {
    "name": "Alice",
    "age": 30,
    "email": "alice@example.com"
}

print(person)

{'name': 'Alice', 'age': 30, 'email': 'alice@example.com'}


### GPT Sturcture_Output. 

In [3]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from typing import TypedDict, Optional, Literal, Annotated

load_dotenv()

model = ChatOpenAI()

review = """I recently upgraded to the Samsung Galaxy S24 Ultra, and I must say, it’s an absolute powerhouse! The Snapdragon 8 Gen 3 processor makes everything lightning fast—whether I’m gaming, multitasking, or editing photos. The 5000mAh battery easily lasts a full day even with heavy use, and the 45W fast charging is a lifesaver. The S-Pen integration is a great touch for note-taking and quick sketches, though I don't use it often. What really blew me away is the 200MP camera—the night mode is stunning, capturing crisp, vibrant images even in low light. Zooming up to 100x actually works well for distant objects, but anything beyond 30x loses quality. However, the weight and size make it a bit uncomfortable for one-handed use. Also, Samsung’s One UI still comes with bloatware—why do I need five different Samsung apps for things Google already provides? The $1,300 price tag is also a hard pill to swallow.

Pros:
Insanely powerful processor (great for gaming and productivity)
Stunning 200MP camera with incredible zoom capabilities
Long battery life with fast charging
S-Pen support is unique and useful. """

class format_review(TypedDict):

    summary: Annotated[str, "Write a brief summary of the review."]
    sentiments: Annotated[Literal["pos", "cons"], "Select one of them positive or negative."]
    pros: Annotated[Optional[list[str]], "Write the pros of the review."]
    cons: Annotated[Optional[list[str]], "Write the cons of the review."]
    name: Annotated[Optional[str], "Write the name of the reviewr."]

structure_model = model.with_structured_output(format_review)

result = structure_model.invoke(review)

print(result)

{'summary': 'Highly powerful and feature-packed flagship smartphone with a few drawbacks', 'sentiments': 'pos', 'pros': ['Insanely powerful Snapdragon 8 Gen 3 processor for lightning-fast performance', 'Stunning 200MP camera with excellent zoom capabilities', 'Long-lasting 5000mAh battery with 45W fast charging support', 'Unique and useful S-Pen integration for note-taking and sketches'], 'cons': ['Heavy weight and large size make one-handed use uncomfortable', 'Bloatware in One UI with redundant Samsung apps', 'High price of $1,300', 'Loss of quality in zoom beyond 30x'], 'name': 'Samsung Galaxy S24 Ultra Review'}


### For datavalidation --> Pydantic

* Forcing user to stick with the programmer datatype. 

In [12]:
from pydantic import BaseModel, Field, EmailStr
from typing import Optional

class Student(BaseModel):

    name: str = "Arjun"
    age: Optional[int] = None
    #email: Optional[EmailStr] = None
    cgpa: float = Field(gt = 0, lt = 10, default=5, description="A decimal value shows performance of the student in the class.")

new_student = {'name': 'Jass', 'cgpa': 8}

student = Student(**new_student)

print(student)

student_dict = dict(student)

print(student_dict['cgpa'])

name='Jass' age=None cgpa=8.0
8.0


In [15]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from typing import TypedDict, Optional, Literal, Annotated
from pydantic import BaseModel, Field

load_dotenv()

model = ChatOpenAI()

review = """I recently upgraded to the Samsung Galaxy S24 Ultra, and I must say, it’s an absolute powerhouse! The Snapdragon 8 Gen 3 processor makes everything lightning fast—whether I’m gaming, multitasking, or editing photos. The 5000mAh battery easily lasts a full day even with heavy use, and the 45W fast charging is a lifesaver. The S-Pen integration is a great touch for note-taking and quick sketches, though I don't use it often. What really blew me away is the 200MP camera—the night mode is stunning, capturing crisp, vibrant images even in low light. Zooming up to 100x actually works well for distant objects, but anything beyond 30x loses quality. However, the weight and size make it a bit uncomfortable for one-handed use. Also, Samsung’s One UI still comes with bloatware—why do I need five different Samsung apps for things Google already provides? The $1,300 price tag is also a hard pill to swallow.

Pros:
Insanely powerful processor (great for gaming and productivity)
Stunning 200MP camera with incredible zoom capabilities
Long battery life with fast charging
S-Pen support is unique and useful. """

"""
class format_review(TypedDict):

    summary: Annotated[str, "Write a brief summary of the review."]
    sentiments: Annotated[Literal["pos", "cons"], "Select one of them positive or negative."]
    pros: Annotated[Optional[list[str]], "Write the pros of the review."]
    cons: Annotated[Optional[list[str]], "Write the cons of the review."]
    name: Annotated[Optional[str], "Write the name of the reviewr."]
"""

class Format_review(BaseModel):

    summary: str = Field(description="Write a brief summary of the review.")    
    sentiments: Literal["pos", "neg"] = Field(description="Select one of them positive or negative.")
    pros: list[str] = Field(description="Write the pros of the review.")
    cons: list[str] = Field(description="Write the cons of the review.")
    name: Optional[str] = Field(default="None", description="Write the name of the reviewr.")

structure_model = model.with_structured_output(Format_review)

result = structure_model.invoke(review)

print(result)
print(dict(result))



summary='The Samsung Galaxy S24 Ultra is an absolute powerhouse with impressive features but comes with a high price tag and some drawbacks.' sentiments='pos' pros=['Insanely powerful processor (great for gaming and productivity)', 'Stunning 200MP camera with incredible zoom capabilities', 'Long battery life with fast charging', 'S-Pen support is unique and useful'] cons=['Weight and size make it uncomfortable for one-handed use', "Bloatware in Samsung's One UI", 'High price tag of $1,300'] name=None
{'summary': 'The Samsung Galaxy S24 Ultra is an absolute powerhouse with impressive features but comes with a high price tag and some drawbacks.', 'sentiments': 'pos', 'pros': ['Insanely powerful processor (great for gaming and productivity)', 'Stunning 200MP camera with incredible zoom capabilities', 'Long battery life with fast charging', 'S-Pen support is unique and useful'], 'cons': ['Weight and size make it uncomfortable for one-handed use', "Bloatware in Samsung's One UI", 'High pric

### There is another schema called Json schema. You use it when your application have used different languages like for backend python and for frontend Java. 

In [None]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from typing import TypedDict, Annotated, Optional, Literal
from pydantic import BaseModel, Field

load_dotenv()

model = ChatOpenAI()

# schema
json_schema = {
  "title": "Review",
  "type": "object",
  "properties": {
    "key_themes": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "Write down all the key themes discussed in the review in a list"
    },
    "summary": {
      "type": "string",
      "description": "A brief summary of the review"
    },
    "sentiment": {
      "type": "string",
      "enum": ["pos", "neg"],
      "description": "Return sentiment of the review either negative, positive or neutral"
    },
    "pros": {
      "type": ["array", "null"],
      "items": {
        "type": "string"
      },
      "description": "Write down all the pros inside a list"
    },
    "cons": {
      "type": ["array", "null"],
      "items": {
        "type": "string"
      },
      "description": "Write down all the cons inside a list"
    },
    "name": {
      "type": ["string", "null"],
      "description": "Write the name of the reviewer"
    }
  },
  "required": ["key_themes", "summary", "sentiment"]
}


structured_model = model.with_structured_output(json_schema)

result = structured_model.invoke("""I recently upgraded to the Samsung Galaxy S24 Ultra, and I must say, it’s an absolute powerhouse! The Snapdragon 8 Gen 3 processor makes everything lightning fast—whether I’m gaming, multitasking, or editing photos. The 5000mAh battery easily lasts a full day even with heavy use, and the 45W fast charging is a lifesaver.

The S-Pen integration is a great touch for note-taking and quick sketches, though I don't use it often. What really blew me away is the 200MP camera—the night mode is stunning, capturing crisp, vibrant images even in low light. Zooming up to 100x actually works well for distant objects, but anything beyond 30x loses quality.

However, the weight and size make it a bit uncomfortable for one-handed use. Also, Samsung’s One UI still comes with bloatware—why do I need five different Samsung apps for things Google already provides? The $1,300 price tag is also a hard pill to swallow.

Pros:
Insanely powerful processor (great for gaming and productivity)
Stunning 200MP camera with incredible zoom capabilities
Long battery life with fast charging
S-Pen support is unique and useful

""")

print(result)

### Some models do not support structured output schema. So, there you will have to use Output Parsers. 

Type of output parser: 
1. str output parser
2. json output parser
3. structured output parser
4. pydantic output parser

In [4]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate

llm = HuggingFacePipeline.from_model_id(
    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task = "text-generation",
    device = -1,
    pipeline_kwargs={"temperature":0.7}
)

model = ChatHuggingFace(llm = llm)

# template 1
template1 = PromptTemplate(
    template="Give me detail summary on {topic_1}",
    input_variables=['topic_1']
)

# template 2
template2 = PromptTemplate(
    template="Give me 3 line summary on this para {para_1}",
    input_variables=['para_1']
)

# prompt 1
prompt1 = template1.invoke({'topic_1': 'proteins'})

result1 = model.invoke(prompt1)

# prompt 2
prompt2 = template2.invoke({'para_1': result1.content})

result2 = model.invoke(prompt2)

print(result2.content)

Device set to use cpu


<|user|>
Give me 3 line summary on this para <|user|>
Give me detail summary on proteins</s>
<|assistant|>
Proteins are biological macromolecules that perform various functions in living organisms. They are composed of smaller molecules called amino acids that are joined together by peptide bonds. The structure, properties, and functions of proteins vary widely depending on the type of protein and the cellular context.

Proteins are essential for maintaining health and functioning of the body. They are involved in various physiological processes such as cell division, growth, repair, and immune response. They play a critical role in metabolism, hormone production, and protein synthesis.

Some examples of proteins include enzymes, antibodies, hormones, and DNA. Enzymes are proteins that catalyze chemical reactions, such as the digestion of food or the production of ATP. Antibodies are proteins that help the immune system recognize and neutralize invading pathogens. Hormones are proteins

In [6]:
from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

load_dotenv()
llm = HuggingFacePipeline.from_model_id(
    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task = "text-generation",
    device = -1,
    pipeline_kwargs={"temperature":0.7}
)

model = ChatHuggingFace(llm=llm)

template1 = PromptTemplate(
    template="Give me the detail summary on {topic}",
    input_variables=['topic']
)

template2 = PromptTemplate(
    template="Give me 3 line summary of this para {para}",
    input_variables=['para']
)

parser = StrOutputParser()

chain = template1 | model | parser | template2 | model | parser

result = chain.invoke({'topic': "protein"})

print(result)

Device set to use cpu


<|user|>
Give me 3 line summary of this para <|user|>
Give me the detail summary on protein</s>
<|assistant|>
The protein is a crucial nutrient that plays a critical role in maintaining healthy bodily function. It is composed of amino acids, which are building blocks of proteins. The human body requires protein for various bodily processes, including growth, repair, and maintenance. Protein is divided into different types, such as essential amino acids (EAAs) and non-essential amino acids (NEAAs), which are essential for tissue growth and development but not essential for the body's survival.

Essential amino acids (EAAs) are the building blocks of proteins, and they are necessary for the body's metabolism. They are found in plant-based foods such as leafy greens, legumes, nuts, and seeds, and they are essential for building and repairing cells. Examples of EAAs include lysine, methionine, and tryptophan.

Non-essential amino acids (NEAAs) are the building blocks of proteins that are n

Json output parser

In [10]:
from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser

llm = HuggingFacePipeline.from_model_id(
    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task = "text-generation",
    device = -1,
    pipeline_kwargs={"temperature":0.7}
)

model = ChatHuggingFace(llm=llm)

parser = JsonOutputParser()

template = PromptTemplate(
    template=(
        "Pick any one character from Game of Thrones and provide:\n"
        "- place_of_birth\n"
        "- place_of_death\n"
        "- main_battle_location\n\n"
        "Return ONLY a valid JSON object with keys "
        "`character`, `place_of_birth`, `place_of_death`, `main_battle_location`.\n"
        "Do not include any explanation, prose, or code.\n"
        "{format_instructions}"
    ),
    input_variables=[],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)
    
chain = template | model | parser

result = chain.invoke({})

print(result)

Device set to use cpu


{'character': {'name': 'Jaime Lannister', 'place_of_birth': 'Casterly Rock, Highgarden', 'place_of_death': 'The Eyrie, Highgarden', 'main_battle_location': 'The Iron Throne'}}


There is one more Structure output parser. But we don't need it that much. 

* To incorporate the data validation we need Pydantic. 

In [26]:
from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field
from typing import Optional, Literal
from langchain_core.output_parsers import PydanticOutputParser
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
"""
llm = HuggingFacePipeline.from_model_id(
    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task = "text-generation",
    device = -1,
    pipeline_kwargs={"temperature":0.7}
)

model = ChatHuggingFace(llm = llm)
"""
model = ChatOpenAI()

class Person(BaseModel):

    name: str = Field(default=None,description=" Name of the character.")
    age_of_death: int = Field(default=None, lt=100, gt=0, description="Age when the person died.")
    location_of_death: str = Field(default=None, description="Location where he died.")
    father_name: str = Field(default=None, description="Father name")
    children_name: Optional[list[str]] = Field(default=None, description="Name of the childrens.")

parser = PydanticOutputParser(pydantic_object=Person)

template = PromptTemplate(
    template = "You are a Game of Thrones expert.\n"
        "Choose any ONE character from the Game of Thrones series.\n\n"
        "Return ONLY a JSON object that matches this schema:\n"
        "{format_instructions}\n\n"
        "Example:\n"
        "```json\n"
        "{{\n"
        "  \"name\": \"Jon Snow\",\n"
        "  \"age_of_death\": 30,\n"
        "  \"location_of_death\": \"Castle Black\",\n"
        "  \"father_name\": \"Rhaegar Targaryen\",\n"
        "  \"children_name\": [\"Unnamed Child\"]\n"
        "}}\n"
        "```\n\n"
        "Now output the JSON object for a (possibly different) character.\n"
        "Do not include any explanation, prose, or code blocks besides the JSON.",
    input_variables= [],
    partial_variables= {"format_instructions": parser.get_format_instructions()}
)

print(f"Input of the model: {template.format()}")

chain = template | model | parser

result = chain.invoke({})

print(result)

Input of the model: You are a Game of Thrones expert.
Choose any ONE character from the Game of Thrones series.

Return ONLY a JSON object that matches this schema:
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"default": null, "description": " Name of the character.", "title": "Name", "type": "string"}, "age_of_death": {"default": null, "description": "Age when the person died.", "exclusiveMaximum": 100, "exclusiveMinimum": 0, "title": "Age Of Death", "type": "integer"}, "location_of_death": {"default": null, "description": "Location where he died.", "title": "Lo