In [6]:
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain. llms.huggingface_pipeline import HuggingFacePipeline

In [7]:
model_name: str = "microsoft/phi-2"

nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM. from_pretrained(
model_name,
quantization_config=nf4_config,
low_cpu_mem_usage=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
tokenizer = AutoTokenizer. from_pretrained(model_name)
max_new_token = 1024

model_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=max_new_token,
pad_token_id=tokenizer.eos_token_id)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [9]:
gen_kwargs = {
"temperature": 0.6
}
llm = HuggingFacePipeline(
pipeline=model_pipeline,
model_kwargs=gen_kwargs)

  llm = HuggingFacePipeline(


# 1. Json Parser

In [12]:
from langchain_core.pydantic_v1 import BaseModel, Field

class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [13]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

parser = JsonOutputParser(pydantic_object=Joke)

In [14]:
print(parser)

pydantic_object=<class '__main__.Joke'>


# 2. Prompt

In [16]:
from langchain_core.prompts import PromptTemplate

prompt= PromptTemplate(
template="Answer the user query. \n{format_instructions}\n{query}\n",
input_variables=["query"],
partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [17]:
print(prompt)

input_variables=['query'] input_types={} partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"setup": {"title": "Setup", "description": "question to set up a joke", "type": "string"}, "punchline": {"title": "Punchline", "description": "answer to resolve the joke", "type": "string"}}, "required": ["setup", "punchline"]}\n```'} template='Answer the user query. \n{format_instructions}\n{query}\n'


# 4. Chain

In [18]:
chain = prompt | llm

In [19]:
joke_query = "Tell me a joke."

In [20]:
output = chain.invoke({"query": joke_query})

In [21]:
output

'Answer the user query. \nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"setup": {"title": "Setup", "description": "question to set up a joke", "type": "string"}, "punchline": {"title": "Punchline", "description": "answer to resolve the joke", "type": "string"}}, "required": ["setup", "punchline"]}\n```\nTell me a joke.\n\nExample:\n```\n{\n    "setup": "Why don\'t scientists trust atoms?",\n    "punchline": "Because they make up everything."\n}\n```\n\n## Solution\n\n```python\nimport json\n\ndef format_joke(joke):\n    # Define the output schema\n    schema = {\n 

In [23]:
print(output)

Answer the user query. 
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"setup": {"title": "Setup", "description": "question to set up a joke", "type": "string"}, "punchline": {"title": "Punchline", "description": "answer to resolve the joke", "type": "string"}}, "required": ["setup", "punchline"]}
```
Tell me a joke.

Example:
```
{
    "setup": "Why don't scientists trust atoms?",
    "punchline": "Because they make up everything."
}
```

## Solution

```python
import json

def format_joke(joke):
    # Define the output schema
    schema = {
        "properties": {
       

In [22]:
parser_output = parser.invoke(output)
parser_output

{'properties': {'setup': {'title': 'Setup',
   'description': 'question to set up a joke',
   'type': 'string'},
  'punchline': {'title': 'Punchline',
   'description': 'answer to resolve the joke',
   'type': 'string'}},
 'required': ['setup', 'punchline']}

# Add to chain