In [2]:
from typing import List, Optional
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI
import os 
from phi.model.groq import Groq

In [3]:
class Person(BaseModel):
    """Information about a person."""

    # ^ Doc-string for the entity Person.
    # This doc-string is sent to the LLM as the description of the schema Person,
    # and it can help to improve extraction results.

    # Note that:
    # 1. Each field is an `optional` -- this allows the model to decline to extract it!
    # 2. Each field has a `description` -- this description is used by the LLM.
    # Having a good description can help improve extraction results.
    name: Optional[str] = Field(default=None, description="The name of the person")
    hair_color: Optional[str] = Field(
        default=None, description="The color of the person's hair if known"
    )
    height_in_meters: Optional[str] = Field(
        default=None, description="Height measured in meters"
    )


class Data(BaseModel):
    """Extracted data about people."""

    # Creates a model so that we can extract multiple entities.
    people: List[Person]

In [4]:
# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

### Open AI

In [5]:
GROQ_API_KEY = os.getenv('GROQ_API_KEY')

In [6]:
llm = ChatOpenAI(model="gpt-4o-mini") #Groq(id ='llama-3.2-3b-preview')#

In [7]:
structured_llm = llm.with_structured_output(schema=Person)

In [8]:
text = "Alan Smith is 6 feet tall and has blond hair."
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

Person(name='Alan Smith', hair_color='blond', height_in_meters='1.83')

In [9]:
structured_llm = llm.with_structured_output(schema=Data)
text = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

Data(people=[Person(name='Jeff', hair_color='black', height_in_meters='1.83'), Person(name='Anna', hair_color='black', height_in_meters=None)])

### OLLAMA

In [10]:
from ollama import chat
from pydantic import BaseModel

In [11]:
class Pet(BaseModel):
  name: str
  animal: str
  age: int
  color: str | None
  favorite_toy: str | None

class PetList(BaseModel):
  pets: list[Pet]

In [12]:
information_text = '''
        I have two pets.
        A cat named Luna who is 5 years old and loves playing with yarn. She has grey fur.
        I also have a 2 year old black cat named Loki who loves tennis balls.
      '''

In [13]:
response = chat(
  messages=[
    {
      'role': 'user',
      'content': information_text,
    }
  ],
  model='llama3.1',#ollama run llama3.1
  format=PetList.model_json_schema(),
)

In [14]:
pets = PetList.model_validate_json(response.message.content)
print(pets)

pets=[Pet(name='Luna', animal='cat', age=5, color='grey', favorite_toy='yarn'), Pet(name='Loki', animal='cat', age=2, color='black', favorite_toy='tennis ball')]


### Information Extraction for dropdown list

In [1]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.schema import BaseOutputParser
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain.chains import LLMChain
from ollama import chat
import json

In [3]:
llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",
    temperature=0.7
)

In [5]:
# Define response schemas
independent_schema = [
    ResponseSchema(name="name", description="The name of the person"),
    ResponseSchema(
        name="hair_color",
        description="The color of the person's hair if known (grey, black, white, or unknown)",
        enum=["grey", "black", "white", "unknown"]
    ),
    ResponseSchema(name="HairColor", description="The color of the person's hair if known"),
    ResponseSchema(name="height_in_meters", description="Height measured in meters"),
]

response_schemas = [
    ResponseSchema(
        name="PersonList",
        description="A list of all persons with their details",
        type="array",
        items={"type": "object", "properties": independent_schema},
    )
]

# Parse the example output
class PersonListParser(BaseOutputParser):
    def parse(self, text: dict) -> dict:
        # Safeguard: Ensure 'hair_color' exists and adjust it if needed
        for person in text.get("PersonList", []):
            if "hair_color" not in person or person["hair_color"] not in ["grey", "black", "white"]:
                person["hair_color"] = "unknown"
        return text  # Return the adjusted structured data

# Create an output parser
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

# Define the prompt template
prompt = PromptTemplate(
    template="Extract the information from the given context.\n{format_instructions}\n\nContext: {paragraph}",
    input_variables=["paragraph"],
    partial_variables={"format_instructions": format_instructions},
)

# Example paragraph
paragraph = '''The warm sun spilled through the tall oak trees as the forest glade came alive with the sound of laughter. 
            Clara, her auburn hair catching the golden light, stood at the edge of the brook, her 5'6" frame bent slightly as she skipped a stone across the water. 
            Beside her, Mark towered at 6'3", his jet-black hair ruffled by the breeze, giving him the appearance of someone caught between seriousness and mischief. 
            "Try this one," he said, handing Clara a perfectly flat pebble, his voice carrying the easy confidence of someone who always knew what to do. 
            In the background, their friend Mia, petite at just 5'2" with an unmistakable cascade of platinum-blonde curls, leaned against a tree trunk, 
            sketching the scene in her notebook with quiet focus. 
            The trio seemed at home in the serene wilderness, their contrasting heights and features painting a vivid picture of camaraderie.'''

# Combine the prompt and LLM into a chain
chain = LLMChain(prompt=prompt, llm=llm)

# Run the chain with the paragraph
result = chain.run({"paragraph": paragraph})

# Parse the output using the custom output parser
parser = PersonListParser()
parsed_output = parser.parse(output_parser.parse(result))

print(parsed_output)


{'PersonList': [{'Name': 'Clara', 'Height': '5\'6"', 'HairColor': 'Auburn', 'hair_color': 'unknown'}, {'Name': 'Mark', 'Height': '6\'3"', 'HairColor': 'Jet-black', 'hair_color': 'unknown'}, {'Name': 'Mia', 'Height': '5\'2"', 'HairColor': 'Platinum-blonde', 'hair_color': 'unknown'}]}


In [19]:
# Define response schemas
independent_schema = [
    ResponseSchema(name="name", description="The name of the person"),
    ResponseSchema(name="hairColor", description="The color of the person's hair if known", enum=["grey", "black", "white", "unknown"]),
    ResponseSchema(name="HairColor", description="The color of the person's hair if known"),
    ResponseSchema(name="height_in_meters", description="Height measured in meters")
]

response_schemas = [
    ResponseSchema(
        name="PersonList",
        description="A list of all persons with their details",
        type="array",
        items={"type": "object", "properties": independent_schema},
    )
]

# Parse the example output
# class PetListParser(BaseOutputParser):
#     def parse(self, text: dict) -> dict:
#         return text  # Just returning the text as structured data

# Parse the example output
class PersonListParser(BaseOutputParser):
    def parse(self, text: dict) -> dict:
        # Adjust the hair color field to "unknown" if it is not in the enum
        for person in text.get("PersonList", []):
              if "hairColor" not in person or person["hairColor"] not in ["grey", "black", "white"]:
                person["hairColor"] = "unknown"
        return text  # Returning the adjusted structured data

# Create an output parser
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

# Define the prompt template
prompt = PromptTemplate(
    template="Extract the information from the given context.\n{format_instructions}\n\nContext: {paragraph}",
    input_variables=["paragraph"],
    partial_variables={"format_instructions": format_instructions},
)

# Example paragraph
paragraph = '''The warm sun spilled through the tall oak trees as the forest glade came alive with the sound of laughter. 
            Clara, her auburn hair catching the golden light, stood at the edge of the brook, her 5'6" frame bent slightly as she skipped a stone across the water. 
            Beside her, Mark towered at 6'3", his jet-black hair ruffled by the breeze, giving him the appearance of someone caught between seriousness and mischief. 
            "Try this one," he said, handing Clara a perfectly flat pebble, his voice carrying the easy confidence of someone who always knew what to do. 
            In the background, their friend Mia, petite at just 5'2" with an unmistakable cascade of platinum-blonde curls, leaned against a tree trunk, 
            sketching the scene in her notebook with quiet focus. 
            The trio seemed at home in the serene wilderness, their contrasting heights and features painting a vivid picture of camaraderie.'''


# Combine the prompt and LLM into a chain
chain = LLMChain(prompt=prompt, llm=llm)

# Run the chain with the paragraph
result = chain.run({"paragraph": paragraph})

# Parse the output using the output parser
parser = PersonListParser()
parsed_output = parser.parse(output_parser.parse(result))


print(parsed_output)


{'PersonList': [{'Name': 'Clara', 'Height': '5\'6"', 'HairColor': 'Auburn', 'hairColor': 'unknown'}, {'Name': 'Mark', 'Height': '6\'3"', 'HairColor': 'Jet-Black', 'hairColor': 'unknown'}, {'Name': 'Mia', 'Height': '5\'2"', 'HairColor': 'Platinum-Blonde', 'hairColor': 'unknown'}]}


In [6]:
def parse_product(description: str) -> dict:
    result = chain.invoke({"input": description})
    print(json.dumps(result, indent=2))

output_parser_prod = JsonOutputParser(pydantic_object={
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "price": {"type": "number"},
        "features": {
            "type": "array",
            "items": {"type": "string"}
        }
    }
})
prompt_prod = ChatPromptTemplate.from_messages([
    ("system", """Extract product details into JSON with this structure:
        {{
            "name": "product name here",
            "price": number_here_without_currency_symbol,
            "features": ["feature1", "feature2", "feature3"]
        }}"""),
    ("user", "{input}")
])

chain = prompt_prod | llm | output_parser_prod


description = """The Kees Van Der Westen Speedster is a high-end, single-group espresso machine known for its precision, performance, 
and industrial design. Handcrafted in the Netherlands, it features dual boilers for brewing and steaming, PID temperature control for 
consistency, and a unique pre-infusion system to enhance flavor extraction. Designed for enthusiasts and professionals, it offers 
customizable aesthetics, exceptional thermal stability, and intuitive operation via a lever system. The pricing is approximatelyt $14,499 
depending on the retailer and customization options."""

parse_product(description)

{
  "name": "Kees Van Der Westen Speedster",
  "price": 14499,
  "features": [
    "Dual boilers for brewing and steaming",
    "PID temperature control for consistency",
    "Unique pre-infusion system to enhance flavor extraction",
    "Customizable aesthetics",
    "Exceptional thermal stability",
    "Intuitive operation via a lever system"
  ]
}


### Creating the Json and Object Creation