# **HuggingFace Models**

## installation
https://python.langchain.com/docs/integrations/chat/huggingface/

In [5]:
from google.colab import userdata
import getpass
import os

# https://huggingface.co/settings/tokens
os.environ["HUGGINGFACEHUB_API_TOKEN"]=userdata.get('huggingToken')

if "HUGGINGFACEHUB_API_TOKEN" not in os.environ:
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = getpass.getpass("Enter your Hugging Face API key: ")

In [6]:
%pip install --upgrade --quiet  langchain-huggingface text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2 bitsandbytes accelerate

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m85.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m330.9/330.9 kB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.4/404.4 kB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.2/255.2 kB[0m [31m17.5 MB/s[0m eta [36

In [None]:
from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from transformers import BitsAndBytesConfig, AutoModelForCausalLM

# https://huggingface.co/microsoft/Phi-3-mini-4k-instruct
model_id = "microsoft/Phi-3-mini-4k-instruct"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

llm = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        do_sample=False,
        repetition_penalty=1.03,
        return_full_text=False,
    ),
    model_kwargs={"quantization_config": quantization_config},
)

chat_model = ChatHuggingFace(llm=llm)

## Listing

In [8]:
task = "List three popular Arabian plates."

# Chat response
ai_response = chat_model.invoke(task)
ai_response.content


Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


" Three popular Arabian plates are:\n\n1. Mansaf - A traditional Jordanian dish made with lamb cooked in a sauce of fermented dried yogurt and served over rice or flatbread.\n2. Kabsa - A Saudi Arabian dish consisting of rice, meat (usually chicken, lamb, or fish), and a blend of spices.\n3. Mandi - An Emirati dish that is similar to mansaf but uses camel meat instead of lamb. It's typically served with rice and a yogurt-based sauce."

In [9]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()
format_instuctions = output_parser.get_format_instructions()

propmt = PromptTemplate(
    template="List three popular {type} plates.\n{format_instructions}",
    input_variables=["type"],
    partial_variables={"format_instructions": format_instuctions}
)

# propmt = PromptTemplate(
#     template="List three popular {type} plates.",
#     input_variables=["type"],
# )

demo_message = propmt.format(type="Arabian")
print("Prompt: " + demo_message)


Prompt: List three popular Arabian plates.
Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`


In [10]:
# Chat response
ai_response = chat_model.invoke(demo_message)
print("AI: " + ai_response.content)

AI:  Chicken Shawarma, Hummus and Falafel, Biryani


In [11]:
# Output
output = output_parser.parse(ai_response.content)
print("Output value: " + str(output))

# Print output type like "Output type: "
print("Output type: " + str(type(output)))


Output value: ['Chicken Shawarma', 'Hummus and Falafel', 'Biryani']
Output type: <class 'list'>


## DateTime

In [12]:
format_instruction = "Replay with the date in DAY/MONTH/YEAR format. like '23/05/1988'"

prompt = PromptTemplate(
    template="When was {person} born?\n{format_instruction}",
    input_variables=['person', 'format_instruction'],
)

message = prompt.format(person="Thomas Edison", format_instruction=format_instruction)

ai_response = chat_model.invoke(message)
ai_response.content

' 10/08/1847'

## PyDantic

In [20]:
from pydantic import BaseModel, Field
from typing import List
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate

class Plate(BaseModel):
  plate_name: str = Field(description="name of the plate")
  ingredients: List[str] = Field(description="list of names of the ingredients")

parser = PydanticOutputParser(pydantic_object=Plate)

format_instructions = parser.get_format_instructions()

prompt = PromptTemplate(
    template="Answer the query.\n{plate_name}?\n{format_instructions}",
    input_variables=["plate_name"],
    partial_variables={"format_instructions": format_instructions}
)

plate_name = 'Shawerma'

message = prompt.format(plate_name=plate_name)
print("meaage: " + message)



meaage: Answer the query.
Shawerma?
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"plate_name": {"description": "name of the plate", "title": "Plate Name", "type": "string"}, "ingredients": {"description": "list of names of the ingredients", "items": {"type": "string"}, "title": "Ingredients", "type": "array"}}, "required": ["plate_name", "ingredients"]}
```


In [21]:
ai_response = chat_model.invoke(message)
print("AI: " + str(ai_response.content))



AI:  ```json
{
  "plate_name": "Shawerma",
  "ingredients": ["shrimp", "avocado", "lime", "tomato", "cucumber", "mayonnaise"]
}
```


In [22]:
output = parser.parse(ai_response.content)
output

Plate(plate_name='Shawerma', ingredients=['shrimp', 'avocado', 'lime', 'tomato', 'cucumber', 'mayonnaise'])

In [23]:
output.plate_name

'Shawerma'

In [24]:
output.ingredients

['shrimp', 'avocado', 'lime', 'tomato', 'cucumber', 'mayonnaise']