In [41]:
"""Structured generation forces the LLM output to follow certain constraints.

https://huggingface.co/learn/cookbook/structured_generation
"""

'Structured generation forces the LLM output to follow certain constraints.\n\nhttps://huggingface.co/learn/cookbook/structured_generation\n'

In [42]:
import pandas as pd
import json
from huggingface_hub import InferenceClient

pd.set_option("display.max_colwidth", None)

In [43]:
repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"

llm_client = InferenceClient(model=repo_id, timeout=120)

### client test

In [44]:
llm_client.text_generation(
    prompt="How are you today?", max_new_tokens=20
)

" I hope you're having a great day! I just wanted to check in and see how things are"

### prompt

In [45]:
RAG_PROMPT_TEMPLATE_JSON = """
Answer the user query based on the source documents.

Here are the source documents: {context}


You should provide your answer as a JSON blob, and also provide all relevant short source snippets from the documents on which you directly based your answer, and a confidence score as a float between 0 and 1.
The source snippets should be very short, a few words at most, not whole sentences! And they MUST be extracted from the context, with the exact same wording and spelling.

Your answer should be built as follows, it must contain the "Answer:" and "End of answer." sequences.

Answer:
{{
  "answer": your_answer,
  "confidence_score": your_confidence_score,
  "source_snippets": ["snippet_1", "snippet_2", ...]
}}
End of answer.

Now begin!
Here is the user question: {user_query}.
Answer:
"""

In [46]:
RELEVANT_CONTEXT = """
Document:

The weather is really nice in Paris today.
To define a stop sequence in Transformers, you should pass the stop_sequence argument in your pipeline or model.
"""

In [47]:
USER_QUERY = "How can I define a stop sequence in Transformers?"

In [48]:
prompt = RAG_PROMPT_TEMPLATE_JSON.format(context=RELEVANT_CONTEXT, user_query=USER_QUERY)
print(prompt)


Answer the user query based on the source documents.

Here are the source documents: 
Document:

The weather is really nice in Paris today.
To define a stop sequence in Transformers, you should pass the stop_sequence argument in your pipeline or model.



You should provide your answer as a JSON blob, and also provide all relevant short source snippets from the documents on which you directly based your answer, and a confidence score as a float between 0 and 1.
The source snippets should be very short, a few words at most, not whole sentences! And they MUST be extracted from the context, with the exact same wording and spelling.

Your answer should be built as follows, it must contain the "Answer:" and "End of answer." sequences.

Answer:
{
  "answer": your_answer,
  "confidence_score": your_confidence_score,
  "source_snippets": ["snippet_1", "snippet_2", ...]
}
End of answer.

Now begin!
Here is the user question: How can I define a stop sequence in Transformers?.
Answer:



In [49]:
answer = llm_client.text_generation(
    prompt,
    max_new_tokens=1000,
)

In [50]:
answer = answer.split("End of answer.")[0]
print(answer)

{
  "answer": "You should pass the stop_sequence argument in your pipeline or model.",
  "confidence_score": 0.9,
  "source_snippets": ["stop_sequence", "pipeline or model"]
}



#### highlight

In [51]:
from ast import literal_eval

parsed_answer = literal_eval(answer)

In [52]:
def highlight(s):
    return "\x1b[1;32m" + s + "\x1b[0m"


def print_results(answer, source_text, highlight_snippets):
    print("Answer:", highlight(answer))
    print("\n\n", "=" * 10 + " Source documents " + "=" * 10)
    for snippet in highlight_snippets:
        source_text = source_text.replace(snippet.strip(), highlight(snippet.strip()))
    print(source_text)


print_results(
    parsed_answer["answer"], 
    RELEVANT_CONTEXT,
    parsed_answer["source_snippets"]
)

Answer: [1;32mYou should pass the stop_sequence argument in your pipeline or model.[0m



Document:

The weather is really nice in Paris today.
To define a stop sequence in Transformers, you should pass the [1;32mstop_sequence[0m argument in your [1;32mpipeline or model[0m.



### pydantic

In [53]:
from pydantic import BaseModel, confloat, StringConstraints
from typing import List, Annotated


class AnswerWithSnippets(BaseModel):
    answer: Annotated[str, StringConstraints(min_length=10, max_length=100)]
    confidence: Annotated[float, confloat(ge=0.0, le=1.0)]
    source_snippets: List[Annotated[str, StringConstraints(max_length=30)]]

In [54]:
AnswerWithSnippets.model_json_schema()

{'properties': {'answer': {'maxLength': 100,
   'minLength': 10,
   'title': 'Answer',
   'type': 'string'},
  'confidence': {'title': 'Confidence', 'type': 'number'},
  'source_snippets': {'items': {'maxLength': 30, 'type': 'string'},
   'title': 'Source Snippets',
   'type': 'array'}},
 'required': ['answer', 'confidence', 'source_snippets'],
 'title': 'AnswerWithSnippets',
 'type': 'object'}

In [55]:
p = """
Answer the user query based on the source documents.

Here are the source documents: 
Document:

The weather is really nice in Paris today.
To define a stop sequence in Transformers, you should pass the stop_sequence argument in your pipeline or model.

Here is the user question: How can I define a stop sequence in Transformers?.
Answer:

"""

In [56]:
# Using text_generation
answer = llm_client.text_generation(
    p,
    grammar={"type": "json", "value": AnswerWithSnippets.model_json_schema()},
    max_new_tokens=250,
)
print(answer)


{ "answer": "To define a stop sequence in Transformers, you should pass the stop_sequence argument in your pipe","confidence": 0.9,"source_snippets": ["To define a stop sequence in", "you should pass the stop", "sequence argument in your"] }


In [57]:
parsed_answer = json.loads(answer)

In [58]:
print_results(
    parsed_answer["answer"], 
    RELEVANT_CONTEXT,
    parsed_answer["source_snippets"]
)

Answer: [1;32mTo define a stop sequence in Transformers, you should pass the stop_sequence argument in your pipe[0m



Document:

The weather is really nice in Paris today.
[1;32mTo define a stop sequence in[0m Transformers, [1;32myou should pass the stop[0m_[1;32msequence argument in your[0m pipeline or model.



### post

In [59]:
data = {
    "inputs": prompt,
    "parameters": {
        "return_full_text": False,
        "grammar": {"type": "json", "value": AnswerWithSnippets.model_json_schema()},
        "max_new_tokens": 250,
    },
}
answer = json.loads(llm_client.post(json=data))[0]["generated_text"]
print(answer)



{"answer": "You should pass the stop_sequence argument in your pipeline or model.", "confidence": 1.0, "source_snippets": ["stop_sequence", "pipeline or model"]}


### grammar

In [60]:
schema = {
    "properties": {
        "location": {"type": "string"},
        "activity": {"type": "string"},
        "animals_seen": {"type": "integer", "minimum": 1, "maximum": 5},
        "animals": {"type": "array", "items": {"type": "string"}},
    },
    "required": ["location", "activity", "animals_seen", "animals"],
}

In [61]:
answer = llm_client.text_generation(
    prompt="I saw a puppy a cat and a raccoon during my bike ride in the park",
    grammar={
        "type": "json", 
        "value": schema
    },
    max_new_tokens=250,
)
answer = json.loads(answer)

In [62]:
print(answer)

{'location': 'park', 'activity': 'bike ride', 'animals_seen': 3, 'animals': ['puppy', 'cat', 'raccoon']}
