# Testing approaches to prompt sharing/downloading on the HF Hub

> [!WARNING]  
> This notebook is not maintained or tested and contains outdated code. I'm just using it for quick testing and many code snippets here won't work with the latest version.

In [1]:
# pip install
#!pip install 'prompt_templates[inference]'

# for dev testing
#!poetry install --extras "inference"

In [2]:
# to load api keys from .env file
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
#import importlib
#import prompt_templates.hub_api
#import prompt_templates.prompt_templates

# Reload the specific module
#importlib.reload(prompt_templates.hub_api)
#importlib.reload(prompt_templates.prompt_templates)
#importlib.reload(prompt_templates.tools)


In [4]:
import logging

logging.basicConfig(level=logging.INFO)

### Issue reproduction

In [2]:
from prompt_templates import TextPromptTemplate

prompt_template = TextPromptTemplate(
    template="""
    You are a helpful assistant that can answer questions about the world.
    You are given the following information:
    {{info}}
    """,
    template_variables=["info"],
)

info = "The Atlantic Ocean is the second-largest of the world's oceans. It covers approximately 20 percent of Earth's surface"
print(f"1. String before populate: {info}")

message = prompt_template.populate(info=info)
print(f"2. String after populate: {message}")

1. String before populate: The Atlantic Ocean is the second-largest of the world's oceans. It covers approximately 20 percent of Earth's surface
2. String after populate: 
    You are a helpful assistant that can answer questions about the world.
    You are given the following information:
    The Atlantic Ocean is the second-largest of the world's oceans. It covers approximately 20 percent of Earth's surface
    


### Prompt Template Dictionary tests

In [2]:
import os
print(os.getcwd())
os.chdir("/Users/moritzlaurer/huggingface/projects/prompt-templates")
print(os.getcwd())


/Users/moritzlaurer/huggingface/projects/prompt-templates/scripts
/Users/moritzlaurer/huggingface/projects/prompt-templates


In [3]:
from prompt_templates import PromptTemplateDictionary

template_dictionary = PromptTemplateDictionary.load_from_local(
    file_path="./tests/test_data/example_prompts/agent_example_1.yaml"
)

In [10]:
from prompt_templates import PromptTemplateDictionary, ChatPromptTemplate, TextPromptTemplate

agent_system_prompt_template = ChatPromptTemplate(
    template=[
        {'role': 'system', 'content': 'You are a code agent and you have the following tools at your disposal:\n<tools>\n{{tool_descriptions}}\n</tools>'},
        {'role': 'user', 'content': 'Here is the task:\n<task>\n{{task}}\n</task>\nNow begin!'},
    ],
    template_variables=['tool_descriptions', 'task'],
)

agent_planning_prompt_template = ChatPromptTemplate(
    template=[
        {'role': 'user', 'content': 'Here is your task:\n\nTask:\n```\n{{task}}\n```\n\nYour plan can leverage any of these tools:\n{{tool_descriptions}}\n\n{{managed_agents_descriptions}}\n\nList of facts that you know:\n```\n{{answer_facts}}\n```\n\nNow begin! Write your plan below.'},
    ],
    template_variables=['task', 'tool_descriptions', 'managed_agents_descriptions', 'answer_facts'],
)

template_dictionary = PromptTemplateDictionary(
    template_dictionary={
        "agent_system_prompt": agent_system_prompt_template,
        "agent_planning_prompt": agent_planning_prompt_template,
    }
)

template_dictionary.template_dictionary

{'agent_system_prompt': ChatPromptTemplate(template=[{'role': 'system', 'content': 'You are a code age...', template_variables=['tool_descriptions', 'task'], metadata={}, client_parameters={}, custom_data={}, populator='jinja2', jinja2_security_level='standard'),
 'agent_planning_prompt': ChatPromptTemplate(template=[{'role': 'user', 'content': 'Here is your task:\n...', template_variables=['task', 'tool_descriptions', 'managed_agents_desc...', metadata={}, client_parameters={}, custom_data={}, populator='jinja2', jinja2_security_level='standard')}

In [5]:
template_dictionary.template_dictionary

{'agent_system_prompt': ChatPromptTemplate(template=[{'role': 'system', 'content': 'You are a code age...', template_variables=['tool_descriptions', 'task'], metadata={}, client_parameters={}, custom_data={}, populator='jinja2', jinja2_security_level='standard'),
 'agent_planning_prompt': TextPromptTemplate(template='Here is your task:\n\nTask:\n```\n{{task}}\n```\n...', template_variables=['task', 'tool_descriptions', 'managed_agents_desc...', metadata={}, client_parameters={}, custom_data={}, populator='jinja2', jinja2_security_level='standard')}

In [8]:
template_dictionary["agent_system_prompt"].display(format="yaml")
template_dictionary["agent_system_prompt"].populate(
    tool_descriptions="... some tool descriptions ...",
    task="... some task ...",
)

template:
- role: "system"
  content: |-
    You are a code agent and you have the following tools at your disposal:
    <tools>
    {{tool_descriptions}}
    </tools>
- role: "user"
  content: |-
    Here is the task:
    <task>
    {{task}}
    </task>
    Now begin!
template_variables:
- tool_descriptions
- task
metadata: {}
client_parameters: {}
custom_data: {}


[{'role': 'system',
  'content': 'You are a code agent and you have the following tools at your disposal:\n<tools>\n... some tool descriptions ...\n</tools>'},
 {'role': 'user',
  'content': 'Here is the task:\n<task>\n... some task ...\n</task>\nNow begin!'}]

### Display fix

In [7]:
from prompt_templates import TextPromptTemplate, ChatPromptTemplate
prompt_template = TextPromptTemplate.load_from_hub(
    repo_id="MoritzLaurer/example_prompts",
    filename="translate.yaml"
)

prompt_template.display(format="yaml")

template: |-
  Translate the following text to {{language}}:
  {{text}}
template_variables:
- language
- text
metadata:
  name: "Simple Translator"
  description: "A simple translation prompt for illustrating the standard prompt YAML
    format"
  tags:
  - translation
  - multilinguality
  version: "0.0.1"
  author: "Guy van Babel"
client_parameters: {}
custom_data: {}


### Testing now loading logic

In [5]:
from prompt_templates import ChatPromptTemplate, TextPromptTemplate

prompt_template = ChatPromptTemplate.load_from_hub(
    repo_id="MoritzLaurer/example_prompts",
    filename="code_teacher.yaml"
)

prompt = prompt_template.populate_template(
    concept="list comprehensions", 
    programming_language="python",
)

prompt_template = TextPromptTemplate.load_from_hub(
    repo_id="MoritzLaurer/example_prompts",
    filename="translate.yaml"
)

prompt = prompt_template.populate_template(
    language="German",
    text="Hello, how are you?"
)



In [6]:
prompt

'Translate the following text to German:\nHello, how are you?'

### rstar-math prompt

In [1]:
import os
print(os.getcwd())
os.chdir("/Users/moritzlaurer/huggingface/projects/prompt-templates")
print(os.getcwd())



/Users/moritzlaurer/huggingface/projects/prompt-templates/scripts
/Users/moritzlaurer/huggingface/projects/prompt-templates


In [2]:
from prompt_templates import TextPromptTemplate

filename = "rstar_math_mcts_prompt_deepseek.yaml"

template = """\
You are a powerful agent with broad math knowledge and great python programming skills. You need to use python interpreter to do accurate calculation on math equations.

!!! Remember:
1. Use code solve the problem step by step. The solution should include three parts: <code>, <output>, and <answer>.
2. All calculations should be done in python code. Provide concise reasoning and thinking in the comments of the code.
3. The most related python packages include 'math', 'sympy', 'scipy', and 'numpy'.
4. Please use the following template:

Question: the input question
<code>Construct the code step by step. Use <end_of_step> to indicate the end of each step. Ensure your code \
can execute correctly(excluding <end_of_step>) and print the answer. Avoid undefined variables (NameError), \
unimported packages, or formatting errors (SyntaxError, TypeError). In the last step of the code, print the final \
answer and add a comment: Now print the final answer.<end_of_code>
<output>Execute the code in using the Python interpreter and display the printed results.<end_of_output>
<answer>The concise answer without verbose context, put your final answer's numerical part (without unit, \
only focus on the numerical part if it's a choice question) in \
boxed.<end_of_answer> Now! It's your turn.
Question: {{input}}

The following are 2 demonstration examples:
Question: Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how \
many times must Terrell lift them in order to lift the same total weight?
<code>
# Step 1: Calculate the total weight lifted with two 20-pound weights
total_weight_20 = 2 * 20 * 12
<end_of_step>

# Step 2: Calculate the weight lifted per repetition with two 15-pound weights
weight_per_rep_15 = 2 * 15
<end_of_step>

# Step 3: Calculate the number of repetitions needed to lift the same total weight with two 15-pound weights
reps_needed = total_weight_20 / weight_per_rep_15
<end_of_step>

# Now print the final answer
print(reps_needed)
<end_of_code>
<output>16.0 <end_of_output> <answer>From the result, we can see that Terrell must lift the 15-pound weights \
boxed16 times to lift the same total weight.
<end_of_answer>,

Question: Find the value of x that satisfies $\\frac{\\sqrt{3x+5}}{\\sqrt{6x+5}} = \\frac{\\sqrt{5}}{3}$ . Express your answer as a common fraction.
<code>
from sympy import symbols, Eq, solve, sqrt

# Define the variable x
x = symbols('x')
<end_of_step>

# Define the equation
equation = Eq(sqrt(3*x + 5) / sqrt(6*x + 5), sqrt(5) / 3)
<end_of_step>

# Solve the equation for x
solution = solve(equation, x)
<end_of_step>

# Now print the final answer
print(solution)
<end_of_code>
<output>[20/3]

<end_of_output> <answer>From the result, we can see that the value of x is $\\boxed{\\frac{20}{3}}$ 
<end_of_answer>"""

template_variables = ["input"]
metadata = {
    "description": """A bootstrap prompt that sets up an agent with math and Python programming capabilities to solve mathematical problems step-by-step using code. 
It provides a structured template with code, output, and answer sections, demonstrated through two examples involving weight calculations and equation solving.
The prompt template was copied from Appendix A.3 of the rStar-Math paper.""",
    "version": "1.0.0",
    "authors": "Xinyu Guan, Li Lyna Zhang, Yifei Liu, Ning Shang, Youran Sun, Yi Zhu, Fan Yang, Mao Yang",
    "authors_org": "Microsoft Research Asia",
    "source": "Appendix A.3 of https://arxiv.org/pdf/2501.04519"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data
)

prompt_template.save_to_local(f"tests/test_data/rstar_math_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/rstar-math-prompts", 
    filename=filename, 
    create_repo=True,
)

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/rstar-math-prompts/commit/1851b643a09c094d8122b7ae540108c60a07de80', commit_message='Upload prompt template rstar_math_mcts_prompt_deepseek.yaml', commit_description='', oid='1851b643a09c094d8122b7ae540108c60a07de80', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/rstar-math-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/rstar-math-prompts'), pr_revision=None, pr_num=None)

In [7]:
from prompt_templates import PromptTemplateLoader

prompt_template = PromptTemplateLoader.from_hub(
    repo_id="MoritzLaurer/rstar-math-prompts", 
    filename="rstar_math_mcts_prompt_deepseek.yaml", 
)

prompt = prompt_template.populate_template(
    input="How many numbers can you get by multiplying two or more " 
          "distinct members of the set 1, 2, 3, 5, 11 together?"
)

print(prompt)


You are a powerful agent with broad math knowledge and great python programming skills. You need to use python interpreter to do accurate calculation on math equations.

!!! Remember:
1. Use code solve the problem step by step. The solution should include three parts: <code>, <output>, and <answer>.
2. All calculations should be done in python code. Provide concise reasoning and thinking in the comments of the code.
3. The most related python packages include 'math', 'sympy', 'scipy', and 'numpy'.
4. Please use the following template:

Question: the input question
<code>Construct the code step by step. Use <end_of_step> to indicate the end of each step. Ensure your code can execute correctly(excluding <end_of_step>) and print the answer. Avoid undefined variables (NameError), unimported packages, or formatting errors (SyntaxError, TypeError). In the last step of the code, print the final answer and add a comment: Now print the final answer.<end_of_code>
<output>Execute the code in usin

In [4]:
# Use any LLM client to do the fact checking
from huggingface_hub import InferenceClient
import os

client = InferenceClient(api_key=os.environ.get("HF_TOKEN"))
response = client.chat.completions.create(
    model="meta-llama/Llama-3.3-70B-Instruct", 
    messages=[{"role": "user", "content": prompt}],
    max_tokens=500
)
print(response.choices[0].message.content)

Question: How many numbers can you get by multiplying two or more distinct members of the set 1, 2, 3, 5, 11 together?

<code>
from itertools import combinations

# Step 1: Define the set of numbers
numbers = [1, 2, 3, 5, 11]
<end_of_step>

# Step 2: Initialize an empty set to store the unique products
unique_products = set()
<end_of_step>

# Step 3: Generate all possible combinations of two or more distinct numbers
for r in range(2, len(numbers) + 1):
    for combo in combinations(numbers, r):
        # Calculate the product of the current combination
        product = 1
        for num in combo:
            product *= num
        # Add the product to the set of unique products
        unique_products.add(product)
<end_of_step>

# Step 4: Count the number of unique products
count = len(unique_products)
<end_of_step>

# Now print the final answer
print(count)
<end_of_code>
<output>15
<end_of_output>
<answer>From the result, we can see that the number of unique products is $\boxed{15}$.

In [8]:
filename = "rstar_math_mcts_prompt_policy_slm.yaml"

template = """\
<|user|>:
{{input_question}}
<|assistant|>: Let's think step by step and solve the problem with code. {{trajectory_of_previous_steps}}"""


template_variables = ["input_question", "trajectory_of_previous_steps"]
metadata = {
    "description": """A Monte Carlo Tree Search (MCTS) based prompt that focuses on generating next steps for problem-solving with a user/assistant dialogue format. 
The prompt template was copied from Appendix A.3 of the rStar-Math paper.""",
    "version": "1.0.0",
    "authors": "Xinyu Guan, Li Lyna Zhang, Yifei Liu, Ning Shang, Youran Sun, Yi Zhu, Fan Yang, Mao Yang",
    "authors_org": "Microsoft Research Asia",
    "source": "Appendix A.3 of https://arxiv.org/pdf/2501.04519"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data
)

prompt_template.save_to_local(f"tests/test_data/rstar_math_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/rstar-math-prompts", 
    filename=filename, 
    create_repo=True,
)

CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/rstar-math-prompts/commit/d35a9bcccb168af440beae2c9bcae37ed5f62bef', commit_message='Upload prompt template rstar_math_mcts_prompt_policy_slm.yaml', commit_description='', oid='d35a9bcccb168af440beae2c9bcae37ed5f62bef', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/rstar-math-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/rstar-math-prompts'), pr_revision=None, pr_num=None)

In [None]:
example = """\
SFT Prompt Example:
<|user|>:
How many numbers can you get by multiplying two or more distinct members of the set 1, 2, 3, 5, 11 together?

<|assistant|>: Let's think step by step and solve the problem with code.
<code>
# Step 1: Create the set of numbers
numbers = {1, 2, 3, 5, 11}
<end_of_step>

# Step 2: Define a function to generate all possible products of two or more distinct members of the set
from itertools import combinations
def generate_products(numbers):
    products = set()
    for r in range(2, len(numbers) + 1):
        # For combinations of 2, 3, 4, and 5 elements
        for combo in combinations(numbers, r):
            product = 1
            for num in combo:
                product *= num
                products.add(product)
                return products
<end_of_step>

# Step 3: Calculate the products
unique_products = generate_products(numbers)
<end_of_step>

# Step 4: Calculate the number of unique products
num_unique_products = len(unique_products)
<end_of_step>

# Now print the final answer
print(num_unique_products)
<end_of_code>
<output>15<end_of_output>
<answer>From the result, we can see that there are $\\boxed{15}$ unique products.
<end_of_answer>\
"""

### Testing changes after removal of PopulatedPrompt class

In [1]:
from prompt_templates import PromptTemplateLoader
from prompt_templates import format_for_client

prompt_template = PromptTemplateLoader.from_hub(repo_id="MoritzLaurer/example_prompts", filename="code_teacher.yaml")
prompt = prompt_template.populate_template(
    concept="list comprehensions", 
    programming_language="python",
)

print(prompt)

format_for_client(prompt, "anthropic")


[{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehensions is in python.'}]


{'system': 'You are a coding assistant who explains concepts clearly and provides short examples.',
 'messages': [{'role': 'user',
   'content': 'Explain what list comprehensions is in python.'}]}

In [2]:
from prompt_templates import format_for_client

messages_anthropic = format_for_client(prompt, client="anthropic")

### Test using FACTS templates

In [2]:
from prompt_templates import PromptTemplateLoader

# load the template
prompt_template = PromptTemplateLoader.from_hub(
    repo_id="MoritzLaurer/facts-grounding-prompts", 
    filename="grounding_accuracy_response_level.yaml"
)

# populate the template with your custom data
prompt = prompt_template.populate_template(
    user_request="Did we meet our revenue targets in January 2024?",  # original user query
    context_document="Revenue targets were exceeded by 10% in January 2023.",  # factual context
    response="Revenue targets were exceeded by 10% in January 2024."  # LLM response to check
)

prompt


"Your task is to check if the Response is accurate to the Evidence.\nGenerate 'Accurate' if the Response is accurate when verified according to the Evidence, or 'Inaccurate' if the Response is inaccurate (contradicts the evidence) or cannot be verified.\n\n**Query**:\n\nDid we meet our revenue targets in January 2024?\n\n**End of Query**\n\n**Evidence**\n\nRevenue targets were exceeded by 10% in January 2023.\n\n**End of Evidence**\n\n**Response**:\n\nRevenue targets were exceeded by 10% in January 2024.\n\n**End of Response**\n\nLet's think step-by-step."

In [5]:
# Use any LLM client to do the fact checking
from huggingface_hub import InferenceClient
import os

client = InferenceClient(api_key=os.environ.get("HF_TOKEN"))
response = client.chat.completions.create(
    model="meta-llama/Llama-3.3-70B-Instruct", 
    messages=[{"role": "user", "content": prompt}],
    max_tokens=500
)
print(response.choices[0].message.content)

To determine the accuracy of the Response, let's break down the information provided in the Query, Evidence, and Response.

1. **Query**: The question asks if revenue targets were met in January 2024.
2. **Evidence**: The evidence provided states that revenue targets were exceeded by 10% in January 2023. It's crucial to note that the year mentioned in the evidence is 2023, not 2024.
3. **Response**: The response claims that revenue targets were not only met but exceeded by 10% in January 2024.

Given the information above, the Response cannot be verified as accurate based on the Evidence because:
- The Evidence pertains to January 2023, while the Query and Response are about January 2024.
- There is no information in the Evidence about the revenue targets for January 2024.

Therefore, the Response is **Inaccurate** because it cannot be verified with the provided Evidence, and it contradicts the Evidence by suggesting that the same outcome (exceeding targets by 10%) occurred in a differ

In [6]:
#!pip install google-genai
from google import genai
from google.genai import types

client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
response = client.models.generate_content(
    model='gemini-2.0-flash-exp',
    contents=types.Content(parts=[types.Part.from_text(prompt)], role="user"),
)
print(response.text)

The evidence states that revenue targets were exceeded in January 2023, while the response claims this happened in January 2024. This is a clear contradiction of the information provided. Therefore, the response is inaccurate.



## Test creating prompt-templates

In [1]:
from prompt_templates import TextPromptTemplate

In [2]:
import os
print(os.getcwd())
os.chdir("/Users/moritzlaurer/huggingface/projects/prompt-templates")
print(os.getcwd())



/Users/moritzlaurer/huggingface/projects/prompt-templates/scripts
/Users/moritzlaurer/huggingface/projects/prompt-templates


In [3]:

filename = "grounding_accuracy_response_level.yaml"
template = """Your task is to check if the Response is accurate to the Evidence.
Generate 'Accurate' if the Response is accurate when verified according to the Evidence, or 'Inaccurate' if the Response is inaccurate (contradicts the evidence) or cannot be verified.

**Query**:\n\n{{user_request}}\n\n**End of Query**\n
**Evidence**\n\n{{context_document}}\n\n**End of Evidence**\n
**Response**:\n\n{{response}}\n\n**End of Response**\n
Let's think step-by-step."""
template_variables = ["user_request", "context_document", "response"]
metadata = {
    "description": """An evaluation prompt from the paper 'The FACTS Grounding Leaderboard: Benchmarking LLMs’ Ability to Ground Responses to Long-Form Input' by Google DeepMind.
    The prompt was copied from the evaluation_prompts.csv file from Kaggle.
    This specific prompt elicits a binary accurate/inaccurate classifier for the entire response.""",
    "evaluation_method": "response_level",
    "tags": ["fact-checking"],
    "version": "1.0.0",
    "author": "Google DeepMind",
    "source": "https://www.kaggle.com/datasets/deepmind/FACTS-grounding-examples?resource=download&select=evaluation_prompts.csv"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data
)

prompt_template.save_to_local(f"tests/test_data/facts_grounding_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/facts-grounding-prompts", 
    filename=filename, 
    create_repo=True,
)


No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts/commit/40145ff58f7599d892b758ab05faf90505d3b66c', commit_message='Upload prompt template grounding_accuracy_response_level.yaml', commit_description='', oid='40145ff58f7599d892b758ab05faf90505d3b66c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/facts-grounding-prompts'), pr_revision=None, pr_num=None)

In [4]:
filename = "grounding_nli_json_alt.yaml"
template = """You are a helpful and harmless AI assistant. You will be provided with a textual context and a model-generated response.
Your task is to analyze the response sentence by sentence and classify each sentence according to its relationship with the provided context.

**Instructions:**

1. **Decompose the response into individual sentences.**
2. **For each sentence, assign one of the following labels:**
    * **`supported`**: The sentence is entailed by the given context.  Provide a supporting excerpt from the context.
    * **`unsupported`**: The sentence is not entailed by the given context. Provide an excerpt that is close but does not fully support the sentence.
    * **`contradictory`**: The sentence is falsified by the given context. Provide a contradicting excerpt from the context.
    * **`no_rad`**: The sentence does not require factual attribution (e.g., opinions, greetings, questions, disclaimers).  No excerpt is needed for this label.

3. **For each label, provide a short rationale explaining your decision.**  The rationale should be separate from the excerpt.

**Input Format:**

The input will consist of two parts, clearly separated:

* **Context:**  The textual context used to generate the response.
* **Response:** The model-generated response to be analyzed.

**Output Format:**

For each sentence in the response, output a JSON object with the following fields:

* `"sentence"`: The sentence being analyzed.
* `"label"`: One of `supported`, `unsupported`, `contradictory`, or `no_rad`.
* `"rationale"`: A brief explanation for the assigned label.
* `"excerpt"`:  A relevant excerpt from the context. Only required for `supported`, `unsupported`, and `contradictory` labels.

Output each JSON object on a new line.

**Example:**

**Input:**

```
Context: Apples are red fruits. Bananas are yellow fruits.

Response: Apples are red. Bananas are green.  Enjoy your fruit!
```

**Output:**

{"sentence": "Apples are red.", "label": "supported", "rationale": "The context explicitly states that apples are red.", "excerpt": "Apples are red fruits."}
{"sentence": "Bananas are green.", "label": "contradictory", "rationale": "The context states that bananas are yellow, not green.", "excerpt": "Bananas are yellow fruits."}
{"sentence": "Enjoy your fruit!", "label": "no_rad", "rationale": "This is a general expression and does not require factual attribution.", "excerpt": null}

**Now, please analyze the following context and response:**

**User Query:**
{{user_request}}

**Context:**
{{context_document}}

**Response:**
{{response}}"""
template_variables = ["user_request", "context_document", "response"]
metadata = {
    "description": """An evaluation prompt from the paper 'The FACTS Grounding Leaderboard: Benchmarking LLMs’ Ability to Ground Responses to Long-Form Input' by Google DeepMind.
    The prompt was copied from the evaluation_prompts.csv file from Kaggle.
    This specific prompt elicits an NLI-style sentence-by-sentence checker outputting JSON for each sentence.""",
    "evaluation_method": "json_alt",
    "tags": ["fact-checking"],
    "version": "1.0.0",
    "author": "Google DeepMind",
    "source": "https://www.kaggle.com/datasets/deepmind/FACTS-grounding-examples?resource=download&select=evaluation_prompts.csv"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data
)

prompt_template.save_to_local(f"tests/test_data/facts_grounding_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/facts-grounding-prompts", 
    filename=filename, 
    create_repo=True,
)

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts/commit/40145ff58f7599d892b758ab05faf90505d3b66c', commit_message='Upload prompt template grounding_nli_json_alt.yaml', commit_description='', oid='40145ff58f7599d892b758ab05faf90505d3b66c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/facts-grounding-prompts'), pr_revision=None, pr_num=None)

In [5]:
filename = "grounding_nli_json.yaml"
template = """You are a helpful and harmless AI assistant. You will be provided with a textual context and a model-generated response.
Your task is to analyze the response sentence by sentence and classify each sentence according to its relationship with the provided context.

**Instructions:**

1. **Decompose the response into individual sentences.**
2. **For each sentence, assign one of the following labels:**
    * **`supported`**: The sentence is entailed by the given context.  Provide a supporting excerpt from the context. The supporting except must *fully* entail the sentence. If you need to cite multiple supporting excepts, simply concatenate them.
    * **`unsupported`**: The sentence is not entailed by the given context. No excerpt is needed for this label.
    * **`contradictory`**: The sentence is falsified by the given context. Provide a contradicting excerpt from the context.
    * **`no_rad`**: The sentence does not require factual attribution (e.g., opinions, greetings, questions, disclaimers).  No excerpt is needed for this label.
3. **For each label, provide a short rationale explaining your decision.**  The rationale should be separate from the excerpt.
4. **Be very strict with your `supported` and `contradictory` decisions.** Unless you can find straightforward, indisputable evidence excerpts *in the context* that a sentence is `supported` or `contradictory`, consider it `unsupported`. You should not employ world knowledge unless it is truly trivial.

**Input Format:**

The input will consist of two parts, clearly separated:

* **Context:**  The textual context used to generate the response.
* **Response:** The model-generated response to be analyzed.

**Output Format:**

For each sentence in the response, output a JSON object with the following fields:

* `"sentence"`: The sentence being analyzed.
* `"label"`: One of `supported`, `unsupported`, `contradictory`, or `no_rad`.
* `"rationale"`: A brief explanation for the assigned label.
* `"excerpt"`:  A relevant excerpt from the context. Only required for `supported` and `contradictory` labels.

Output each JSON object on a new line.

**Example:**

**Input:**

```
Context: Apples are red fruits. Bananas are yellow fruits.

Response: Apples are red. Bananas are green. Bananas are cheaper than apples. Enjoy your fruit!
```

**Output:**

{"sentence": "Apples are red.", "label": "supported", "rationale": "The context explicitly states that apples are red.", "excerpt": "Apples are red fruits."}
{"sentence": "Bananas are green.", "label": "contradictory", "rationale": "The context states that bananas are yellow, not green.", "excerpt": "Bananas are yellow fruits."}
{"sentence": "Bananas are cheaper than apples.", "label": "unsupported", "rationale": "The context does not mention the price of bananas or apples.", "excerpt": null}
{"sentence": "Enjoy your fruit!", "label": "no_rad", "rationale": "This is a general expression and does not require factual attribution.", "excerpt": null}

**Now, please analyze the following context and response:**

**User Query:**
{{user_request}}

**Context:**
{{context_document}}

**Response:**
{{response}}"""
template_variables = ["user_request", "context_document", "response"]
metadata = {
    "description": """An evaluation prompt from the paper 'The FACTS Grounding Leaderboard: Benchmarking LLMs’ Ability to Ground Responses to Long-Form Input' by Google DeepMind.
    The prompt was copied from the evaluation_prompts.csv file from Kaggle.
    This specific prompt elicits an NLI-style sentence-by-sentence checker outputting JSON for each sentence.""",
    "evaluation_method": "json",
    "tags": ["fact-checking"],
    "version": "1.0.0",
    "author": "Google DeepMind",
    "source": "https://www.kaggle.com/datasets/deepmind/FACTS-grounding-examples?resource=download&select=evaluation_prompts.csv"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data
)

prompt_template.save_to_local(f"tests/test_data/facts_grounding_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/facts-grounding-prompts", 
    filename=filename, 
    create_repo=True,
)


No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts/commit/40145ff58f7599d892b758ab05faf90505d3b66c', commit_message='Upload prompt template grounding_nli_json.yaml', commit_description='', oid='40145ff58f7599d892b758ab05faf90505d3b66c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/facts-grounding-prompts'), pr_revision=None, pr_num=None)

In [6]:
# how does template_variables validation work with this?
# should include meta-data info that input is a dict and template requires jinja2?

filename = "grounding_nli_json_double_check.yaml"
template = """Your task is to verify whether a given sentence is entailed by a given context or not. Answer only in YES or NO without any additional text. Do not try to avoid answering, or apologize, or give any answer that isn't simply YES or NO.

**Sentence**
{{json_dict["sentence"]}}

**Context**
{{json_dict["excerpt"]}}"""
template_variables = ["json_dict"]
metadata = {
    "description": """An evaluation prompt from the paper 'The FACTS Grounding Leaderboard: Benchmarking LLMs’ Ability to Ground Responses to Long-Form Input' by Google DeepMind.
    The prompt was copied from the evaluation_prompts.csv file from Kaggle.
    This specific prompt elicits a binary entailment/non-entailment classifier. It requires a dict as input""",
    "evaluation_method": "json_with_double_check",
    "tags": ["fact-checking"],
    "version": "1.0.0",
    "author": "Google DeepMind",
    "source": "https://www.kaggle.com/datasets/deepmind/FACTS-grounding-examples?resource=download&select=evaluation_prompts.csv"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data,
    populator="jinja2"
)

prompt_template.save_to_local(f"tests/test_data/facts_grounding_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/facts-grounding-prompts", 
    filename=filename, 
    create_repo=True,
)


No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts/commit/40145ff58f7599d892b758ab05faf90505d3b66c', commit_message='Upload prompt template grounding_nli_json_double_check.yaml', commit_description='', oid='40145ff58f7599d892b758ab05faf90505d3b66c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/facts-grounding-prompts'), pr_revision=None, pr_num=None)

In [7]:
# should include meta-data info that input is a dict and template requires jinja2?

filename = "grounding_accuracy_span_level.yaml"
template = """Your task is to check if a specific Span is accurate to the Evidence.
Generate 'Accurate' if the Span is accurate when verified according to the Evidence or when there is nothing to verify in the Span.
Generate 'Inaccurate' if the Span is inaccurate (contradicts the evidence), or cannot be verified.

**Query**:\n\n{{user_request}}\n\n**End of Query**\n
**Evidence**\n\n{{context_document}}\n\n**End of Evidence**\n
**Response**:\n\n{{response}}\n\n**End of Response**\n

You are currently verifying **Span {{ix+1}}** from the Response.
**Span {{ix+1}}**:\n\n{{span}}\n\n**End of Span {{ix+1}}**\n

Is Span {{ix+1}} accurate or inaccurate when verified according to the Evidence? Point to where in the evidence justifies your answer."""
template_variables = ["user_request", "context_document", "response", "ix", "span"]
metadata = {
    "description": """An evaluation prompt from the paper 'The FACTS Grounding Leaderboard: Benchmarking LLMs’ Ability to Ground Responses to Long-Form Input' by Google DeepMind.
    The prompt was copied from the evaluation_prompts.csv file from Kaggle.
    This specific prompt elicits a binary accurate/non-accurate classifier on a span level.""",
    "evaluation_method": "span_level",
    "tags": ["fact-checking"],
    "version": "1.0.0",
    "author": "Google DeepMind",
    "source": "https://www.kaggle.com/datasets/deepmind/FACTS-grounding-examples?resource=download&select=evaluation_prompts.csv"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data
)

prompt_template.save_to_local(f"tests/test_data/facts_grounding_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/facts-grounding-prompts", 
    filename=filename, 
    create_repo=True,
)


No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts/commit/40145ff58f7599d892b758ab05faf90505d3b66c', commit_message='Upload prompt template grounding_accuracy_span_level.yaml', commit_description='', oid='40145ff58f7599d892b758ab05faf90505d3b66c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/facts-grounding-prompts'), pr_revision=None, pr_num=None)

In [8]:

filename = "grounding_accuracy_implicit_span_level.yaml"
template = """Your task is to check if the Response is accurate to the Evidence.
Generate 'Accurate' if the Response is accurate when verified according to the Evidence, or 'Inaccurate' if the Response is inaccurate (contradicts the evidence) or cannot be verified.

**Query**:\n\n{{user_request}}\n\n**End of Query**\n
**Evidence**\n\n{{context_document}}\n\n**End of Evidence**\n
**Response**:\n\n{{response}}\n\n**End of Response**\n

Break down the Response into sentences and classify each one separately, then give the final answer: If even one of the sentences is inaccurate, then the Response is inaccurate.

For example, your output should be of this format:
Sentence 1: <Sentence 1>
Sentence 1 label: Accurate/Inaccurate (choose 1)
Sentence 2: <Sentence 2>
Sentence 2 label: Accurate/Inaccurate (choose 1)
Sentence 3: <Sentence 3>
Sentence 3 label: Accurate/Inaccurate (choose 1)
[...]
Final Answer: Accurate/Inaccurate (choose 1)"""
template_variables = ["user_request", "context_document", "response"]
metadata = {
    "description": """An evaluation prompt from the paper 'The FACTS Grounding Leaderboard: Benchmarking LLMs’ Ability to Ground Responses to Long-Form Input' by Google DeepMind.
    The prompt was copied from the evaluation_prompts.csv file from Kaggle.
    This specific prompt elicits a binary accurate/non-accurate classifier for the entire response after generating and classifying each sentence separately.""",
    "evaluation_method": "implicit_span_level",
    "tags": ["fact-checking"],
    "version": "1.0.0",
    "author": "Google DeepMind",
    "source": "https://www.kaggle.com/datasets/deepmind/FACTS-grounding-examples?resource=download&select=evaluation_prompts.csv"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data
)

prompt_template.save_to_local(f"tests/test_data/facts_grounding_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/facts-grounding-prompts", 
    filename=filename, 
    create_repo=True,
)


No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts/commit/40145ff58f7599d892b758ab05faf90505d3b66c', commit_message='Upload prompt template grounding_accuracy_implicit_span_level.yaml', commit_description='', oid='40145ff58f7599d892b758ab05faf90505d3b66c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/facts-grounding-prompts'), pr_revision=None, pr_num=None)

In [3]:

filename = "instruction_following_responses_filter_w_context.yaml"
template = """Your mission is to judge the response from an AI model, the *test* response, calibrating your judgement using a *baseline* response.
Please use the following rubric criteria to judge the responses:

<START OF RUBRICS>
Your task is to analyze the test response based on the criterion of "Instruction Following". Start your analysis with "Analysis".

**Instruction Following**
Please first list the instructions in the user query.
In general, an instruction is VERY important if it is specifically asked for in the prompt and deviates from the norm. Please highlight such specific keywords.
You should also derive the task type from the user query and include the task-specific implied instructions.
Sometimes, no instruction is available in the user query.
It is your job to infer if the instruction is to autocomplete the user query or is asking the LLM for follow-ups.
After listing the instructions, you should rank them in order of importance.
After that, INDEPENDENTLY check if the test response and the baseline response meet each of the instructions.
You should itemize, for each instruction, whether the response meets, partially meets, or does not meet the requirement, using reasoning.
You should start reasoning first before reaching a conclusion about whether the response satisfies the requirement.
Citing examples while reasoning is preferred.

Reflect on your answer and consider the possibility that you are wrong.
If you are wrong, explain clearly what needs to be clarified, improved, or changed in the rubric criteria and guidelines.

In the end, express your final verdict as one of the following three json objects:

```json
{
  "Instruction Following": "No Issues"
}
```

```json
{
  "Instruction Following": "Minor Issue(s)"
}
```

```json
{
  "Instruction Following": "Major Issue(s)"
}
```

<END OF RUBRICS>

# Your task
## User query
<|begin_of_query|>
{{full_prompt}}
<|end_of_query|>

## Test Response:
<|begin_of_test_response|>
{{response_a}}
<|end_of_test_response|>

## Baseline Response:
<|begin_of_baseline_response|>
{{response_b}}
<|end_of_baseline_response|>

Please write your analysis and final verdict for the test response."""
template_variables = ["full_prompt", "response_a", "response_b"]
metadata = {
    "description": """An evaluation prompt from the paper 'The FACTS Grounding Leaderboard: Benchmarking LLMs’ Ability to Ground Responses to Long-Form Input' by Google DeepMind.
    The prompt was copied from the evaluation_prompts.csv file from Kaggle.
    This specific prompt elicits a three class classifier to detect issues linked to instruction following with context.
    Note that the double {{}} around the json blocks was simplified to a single {}.""",
    "evaluation_method": "ineligible_responses_filter_with_context",
    "tags": ["fact-checking"],
    "version": "1.0.0",
    "author": "Google DeepMind",
    "source": "https://www.kaggle.com/datasets/deepmind/FACTS-grounding-examples?resource=download&select=evaluation_prompts.csv"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data
)

prompt_template.save_to_local(f"tests/test_data/facts_grounding_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/facts-grounding-prompts", 
    filename=filename, 
    create_repo=True,
)


No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts/commit/40145ff58f7599d892b758ab05faf90505d3b66c', commit_message='Upload prompt template instruction_following_responses_filter_w_context.yaml', commit_description='', oid='40145ff58f7599d892b758ab05faf90505d3b66c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/facts-grounding-prompts'), pr_revision=None, pr_num=None)

In [4]:
prompt_template

TextPromptTemplate(template='Your mission is to judge the response from an AI ..., template_variables=['full_prompt', 'response_a', 'response_b'], metadata={'description': "An evaluation prompt from the pap..., client_parameters={}, custom_data={}, populator='jinja2', jinja2_security_level='standard')

In [10]:
prompt_template.populate_template(
    full_prompt="Create a tic-tac-toe game for me in Python",
    response_a="Here is the code for the tic-tac-toe game:",
    response_b="Here is the code for the tic-tac-toe game:"
)

PopulatedPrompt('Your mission is to judge the response from an AI model, the *test* response, calibrating your judgement using a *baseline* response.\nPlease use the following rubric criteria to judge the responses:\n\n<START OF RUBRICS>\nYour task is to analyze the test response based on the criterion of "Instruction Following". Start your analysis with "Analysis".\n\n**Instruction Following**\nPlease first list the instructions in the user query.\nIn general, an instruction is VERY important if it is specifically asked for in the prompt and deviates from the norm. Please highlight such specific keywords.\nYou should also derive the task type from the user query and include the task-specific implied instructions.\nSometimes, no instruction is available in the user query.\nIt is your job to infer if the instruction is to autocomplete the user query or is asking the LLM for follow-ups.\nAfter listing the instructions, you should rank them in order of importance.\nAfter that, INDEPENDENT

In [19]:

filename = "instruction_following_responses_filter_wo_context.yaml"
template = """Your mission is to judge the response from an AI model, the *test* response, calibrating your judgement using a *baseline* response.
Please use the following rubric criteria to judge the responses:

<START OF RUBRICS>
Your task is to analyze the test response based on the criterion of "Instruction Following". Start your analysis with "Analysis".

**Instruction Following**
Please first list the instructions in the user query.
In general, an instruction is VERY important if it is specifically asked for in the prompt and deviates from the norm. Please highlight such specific keywords.
You should also derive the task type from the user query and include the task-specific implied instructions.
Sometimes, no instruction is available in the user query.
It is your job to infer if the instruction is to autocomplete the user query or is asking the LLM for follow-ups.
After listing the instructions, you should rank them in order of importance.
After that, INDEPENDENTLY check if the test response and the baseline response meet each of the instructions.
You should itemize, for each instruction, whether the response meets, partially meets, or does not meet the requirement, using reasoning.
You should start reasoning first before reaching a conclusion about whether the response satisfies the requirement.
Citing examples while reasoning is preferred.

Reflect on your answer and consider the possibility that you are wrong.
If you are wrong, explain clearly what needs to be clarified, improved, or changed in the rubric criteria and guidelines.

In the end, express your final verdict as one of the following three json objects:

```json
{
  "Instruction Following": "No Issues"
}
```

```json
{
  "Instruction Following": "Minor Issue(s)"
}
```

```json
{
  "Instruction Following": "Major Issue(s)"
}
```

<END OF RUBRICS>

# Your task
## User query
<|begin_of_query|>
{{user_request}}
<|end_of_query|>

## Test Response:
<|begin_of_test_response|>
{{response_a}}
<|end_of_test_response|>

## Baseline Response:
<|begin_of_baseline_response|>
{{response_b}}
<|end_of_baseline_response|>

Please write your analysis and final verdict for the test response."""
template_variables = ["user_request", "response_a", "response_b"]
metadata = {
    "description": """An evaluation prompt from the paper 'The FACTS Grounding Leaderboard: Benchmarking LLMs’ Ability to Ground Responses to Long-Form Input' by Google DeepMind.
    The prompt was copied from the evaluation_prompts.csv file from Kaggle.
    This specific prompt elicits a three class classifier to detect issues linked to instruction following without context.
    Note that the double {{}} around the json blocks was simplified to a single {}.""",
    "evaluation_method": "ineligible_responses_filter_no_context",
    "tags": ["fact-checking"],
    "version": "1.0.0",
    "author": "Google DeepMind",
    "source": "https://www.kaggle.com/datasets/deepmind/FACTS-grounding-examples?resource=download&select=evaluation_prompts.csv"
}
client_parameters = {}
custom_data = {}

prompt_template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata,
    client_parameters=client_parameters,
    custom_data=custom_data
)

prompt_template.save_to_local(f"tests/test_data/facts_grounding_prompts/{filename}")

prompt_template.save_to_hub(
    repo_id="MoritzLaurer/facts-grounding-prompts", 
    filename=filename, 
    create_repo=True,
)


Template syntax appears to use 'double_brace' populator style, but populator='jinja2' was specified. This mismatch might cause errors. Consider updating either the template syntax or the populator type.
No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts/commit/40145ff58f7599d892b758ab05faf90505d3b66c', commit_message='Upload prompt template instruction_following_responses_filter_wo_context.yaml', commit_description='', oid='40145ff58f7599d892b758ab05faf90505d3b66c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/facts-grounding-prompts', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/facts-grounding-prompts'), pr_revision=None, pr_num=None)

## Test Gemini
Using the new GenAI SDK: https://cloud.google.com/vertex-ai/generative-ai/docs/sdks/overview
docs: https://googleapis.github.io/python-genai/genai.html#
pypi: https://pypi.org/project/google-genai/
Instead of the older gemini dev API or vertex API, which were two separate APIs

In [1]:
import os
from google import genai
from google.genai import types

client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))


In [22]:
from prompt_templates import PromptTemplateLoader

prompt_template = PromptTemplateLoader.from_hub(repo_id="MoritzLaurer/closed_system_prompts", filename="claude-3-5-artifacts-leak-210624.yaml")

messages = prompt_template.populate_template(user_message="Create a tic-tac-toe game for me in Python", current_date="Wednesday, 11 December 2024")

messages_gemini = messages.format_for_client(client="gemini")

messages_gemini

PopulatedPrompt({'system_instruction': '<artifacts_info>\nThe assistant can create and reference artifacts during conversations. Artifacts are for substantial, self-contained content that users might modify or reuse, displayed in a separate UI window for clarity.\n\n# Good artifacts are...\n- Substantial content (>15 lines)\n- Content that the user is likely to modify, iterate on, or take ownership of\n- Self-contained, complex content that can be understood on its own, without context from the conversation\n- Content intended for eventual use outside the conversation (e.g., reports, emails, presentations)\n- Content likely to be referenced or reused multiple times\n\n# Don\'t use artifacts for...\n- Simple, informational, or short content, such as brief code snippets, mathematical equations, or small examples\n- Primarily explanatory, instructional, or illustrative content, such as examples provided to clarify a concept\n- Suggestions, commentary, or feedback on existing artifacts\n- 

In [25]:
response = client.models.generate_content(
    model='gemini-2.0-flash-exp',
    contents=messages_gemini["contents"],
    config=types.GenerateContentConfig(
        system_instruction=messages_gemini["system_instruction"],
        temperature=0,
        top_p=0.95,
    )
)

print(response.text)

```python
def print_board(board):
    print("   1   2   3")
    print("  -----------")
    print(f"A | {board['A1']} | {board['A2']} | {board['A3']} |")
    print("  -----------")
    print(f"B | {board['B1']} | {board['B2']} | {board['B3']} |")
    print("  -----------")
    print(f"C | {board['C1']} | {board['C2']} | {board['C3']} |")
    print("  -----------")

def check_win(board, player):
    win_conditions = [
        ['A1', 'A2', 'A3'], ['B1', 'B2', 'B3'], ['C1', 'C2', 'C3'],
        ['A1', 'B1', 'C1'], ['A2', 'B2', 'C2'], ['A3', 'B3', 'C3'],
        ['A1', 'B2', 'C3'], ['A3', 'B2', 'C1']
    ]
    for condition in win_conditions:
        if all(board[cell] == player for cell in condition):
            return True
    return False

def tic_tac_toe():
    board = {
        'A1': ' ', 'A2': ' ', 'A3': ' ',
        'B1': ' ', 'B2': ' ', 'B3': ' ',
        'C1': ' ', 'C2': ' ', 'C3': ' '
    }
    current_player = 'X'
    moves_made = 0

    while moves_made < 9:
        print_board

In [24]:
response.candidates[0].citation_metadata.citations

[Citation(end_index=1474, license=None, publication_date=None, start_index=1246, title=None, uri='https://github.com/CrystalHafley/Tic-Tac-Toe')]

In [9]:
messages_gemini["system_instruction"]

'You are a coding assistant who explains concepts clearly and provides short examples.'

In [13]:

response = client.models.generate_content(
    model='gemini-2.0-flash-exp',
    contents=messages_gemini["contents"],
    #[
        #types.Content(parts=[types.Part(text='high')], role='user'),
        #types.Content(parts=[types.Part(text='low')], role='model'),
        #types.Content(parts=[types.Part(text='Please repeat the last three messages.')], role='user'),
    #],
    config=types.GenerateContentConfig(
        system_instruction="Helpful assistant",  #'I say high, you say low',
        temperature=0,
        top_p=0.95,
        top_k=20,
        candidate_count=1,
        seed=5,
        max_output_tokens=100,
        stop_sequences=["STOP!"],
        presence_penalty=0.0,
        frequency_penalty=0.0,
        #safety_settings= [types.SafetySetting(
        #    category='HARM_CATEGORY_HATE_SPEECH',
        #    threshold='BLOCK_ONLY_HIGH',
        #)]
    ),
)
print(response.text)
response

Okay, let's break down list comprehension in Python.

**What is List Comprehension?**

List comprehension is a concise and elegant way to create lists in Python. It provides a more readable and often more efficient alternative to using traditional `for` loops for building lists. Think of it as a shorthand for creating lists based on existing iterables (like lists, tuples, strings, ranges, etc.).

**The Basic Structure**

The general syntax of a list comprehension looks like this


GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text="Okay, let's break down list comprehension in Python.\n\n**What is List Comprehension?**\n\nList comprehension is a concise and elegant way to create lists in Python. It provides a more readable and often more efficient alternative to using traditional `for` loops for building lists. Think of it as a shorthand for creating lists based on existing iterables (like lists, tuples, strings, ranges, etc.).\n\n**The Basic Structure**\n\nThe general syntax of a list comprehension looks like this")], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='MAX_TOKENS', grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=[SafetyRating(blocked=None, category='HARM_CATEGORY_HATE_SPEECH

In [3]:
history = [
    types.Content(
        parts=[types.Part(text="Please say two random words.")],
        role="user",
    ),
    types.Content(
        parts=[types.Part(text="Elephant and apple.\n")],
        role="model",
    ),
]

chat = client.chats.create(model='gemini-2.0-flash-exp', history=history)

response = chat.send_message("Please say the same two words again just the other way around.")
print(response.text)

#chat._curated_history[0].parts[0].text
#response = chat.send_message('Please say the same two words again just the other way around..')
#print(response.text)



Apple and elephant.



In [13]:
# pip install google-generativeai
# old SDK
"""import os
import google.generativeai as genai

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# Create the model
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 40,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-2.0-flash-exp",
  generation_config=generation_config,
  system_instruction="You help me test things",
)

chat_session = model.start_chat(
  history=[
    {
      "role": "user",
      "parts": [
        "How is it going?",
      ],
    },
    {
      "role": "model",
      "parts": [
        "It's going well! I'm ready and eager to help you with any testing you need. Just let me know what you'd like to try out. Whether it's a piece of code, a concept, or even just a hypothetical scenario, I'm here to help.\n\nSo, what are we testing today? 😊\n",
      ],
    },
    {
      "role": "user",
      "parts": [
        "say one word please",
      ],
    },
    {
      "role": "model",
      "parts": [
        "Testing.\n",
      ],
    },
  ]
)

response = chat_session.send_message("INSERT_INPUT_HERE")

print(response.text)"""

'import os\nimport google.generativeai as genai\n\ngenai.configure(api_key=os.environ["GEMINI_API_KEY"])\n\n# Create the model\ngeneration_config = {\n  "temperature": 1,\n  "top_p": 0.95,\n  "top_k": 40,\n  "max_output_tokens": 8192,\n  "response_mime_type": "text/plain",\n}\n\nmodel = genai.GenerativeModel(\n  model_name="gemini-2.0-flash-exp",\n  generation_config=generation_config,\n  system_instruction="You help me test things",\n)\n\nchat_session = model.start_chat(\n  history=[\n    {\n      "role": "user",\n      "parts": [\n        "How is it going?",\n      ],\n    },\n    {\n      "role": "model",\n      "parts": [\n        "It\'s going well! I\'m ready and eager to help you with any testing you need. Just let me know what you\'d like to try out. Whether it\'s a piece of code, a concept, or even just a hypothetical scenario, I\'m here to help.\n\nSo, what are we testing today? 😊\n",\n      ],\n    },\n    {\n      "role": "user",\n      "parts": [\n        "say one word plea

## Test ruamel outputs

In [None]:
from prompt_templates.utils import format_template_content, create_yaml_handler

# Sample data simulating your prompt template
template = {
    "prompt": {
        "template": [
            {
                "role": "system",
                "content": "You are a coding assistant who explains concepts clearly and provides short examples.\n\nYou are a coding assistant who explains concepts clearly and provides short examples."
            },
            {
                "role": "user",
                "content": "Explain what {{concept}} is in {{programming_language}}."
            }
        ],
        "template_variables": ["concept", "programming_language"],
        "metadata": {
            "name": "Code Teacher",
            "description": "A simple chat prompt for explaining programming concepts with examples",
            "tags": ["programming", "education"],
            "version": "0.0.1",
            "author": "Guido van Bossum"
        },
        "client_parameters": {},
        "custom_data": {}
    }
}

# Apply formatting
formatted_template = format_template_content(template)

# Create YAML handler
yaml = create_yaml_handler()

# Dump to YAML file
with open("code_teacher_test.yaml", "w") as f:
    yaml.dump(formatted_template, f)

### Docs update tests

In [1]:
from prompt_templates import list_prompt_templates
files = list_prompt_templates("MoritzLaurer/closed_system_prompts")
files

['claude-3-5-artifacts-leak-210624.yaml',
 'claude-3-5-sonnet-text-090924.yaml',
 'claude-3-5-sonnet-text-image-090924.yaml',
 'openai-metaprompt-audio.yaml',
 'openai-metaprompt-text.yaml']

In [2]:
from prompt_templates import PromptTemplateLoader
prompt_template = PromptTemplateLoader.from_hub(
    repo_id="MoritzLaurer/closed_system_prompts",
    filename="claude-3-5-artifacts-leak-210624.yaml"
)

#prompt_template.template
#prompt_template.template_variables
prompt_template.metadata

{'source': 'https://gist.github.com/dedlim/6bf6d81f77c19e20cd40594aa09e3ecd'}

In [3]:
messages = prompt_template.populate_template(
    user_message="Create a tic-tac-toe game for me in Python",
    current_date="Wednesday, 11 December 2024"
)
messages#[0]["content"][:100]


PopulatedPrompt([{'role': 'system', 'content': '<artifacts_info>\nThe assistant can create and reference artifacts during conversations. Artifacts are for substantial, self-contained content that users might modify or reuse, displayed in a separate UI window for clarity.\n\n# Good artifacts are...\n- Substantial content (>15 lines)\n- Content that the user is likely to modify, iterate on, or take ownership of\n- Self-contained, complex content that can be understood on its own, without context from the conversation\n- Content intended for eventual use outside the conversation (e.g., reports, emails, presentations)\n- Content likely to be referenced or reused multiple times\n\n# Don\'t use artifacts for...\n- Simple, informational, or short content, such as brief code snippets, mathematical equations, or small examples\n- Primarily explanatory, instructional, or illustrative content, such as examples provided to clarify a concept\n- Suggestions, commentary, or feedback on existing artif

In [4]:
from openai import OpenAI
import os
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

# Make the API call
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages
)

print(response.choices[0].message.content[:100], "...")

Here's a simple text-based Tic-Tac-Toe game in Python. This code allows two players to take turns pl ...


In [5]:
messages_anthropic = messages.format_for_client(client="anthropic")
messages_anthropic

PopulatedPrompt({'system': '<artifacts_info>\nThe assistant can create and reference artifacts during conversations. Artifacts are for substantial, self-contained content that users might modify or reuse, displayed in a separate UI window for clarity.\n\n# Good artifacts are...\n- Substantial content (>15 lines)\n- Content that the user is likely to modify, iterate on, or take ownership of\n- Self-contained, complex content that can be understood on its own, without context from the conversation\n- Content intended for eventual use outside the conversation (e.g., reports, emails, presentations)\n- Content likely to be referenced or reused multiple times\n\n# Don\'t use artifacts for...\n- Simple, informational, or short content, such as brief code snippets, mathematical equations, or small examples\n- Primarily explanatory, instructional, or illustrative content, such as examples provided to clarify a concept\n- Suggestions, commentary, or feedback on existing artifacts\n- Conversation

In [6]:
# Initialize the Anthropic client (make sure to set your API key first)
from anthropic import Anthropic
client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))

# Make the API call
response = client.messages.create(
    model="claude-3-sonnet-20240229",
    system=messages_anthropic["system"],
    messages=messages_anthropic["messages"],
    max_tokens=1000
)

print(response.content[0].text[:100], "...")

Sure, I can create a tic-tac-toe game for you in Python. Here's a simple implementation:

<antThinki ...


In [9]:
from huggingface_hub import InferenceClient

messages = prompt_template.populate_template(
    user_message="Create a tic-tac-toe game for me in Python",
    current_date="Wednesday, 11 December 2024"
)

client = InferenceClient(api_key=os.environ.get("HF_TOKEN"))

response = client.chat.completions.create(
    model="meta-llama/Llama-3.3-70B-Instruct", 
	messages=messages.to_dict(),
	max_tokens=500
)

print(response.choices[0].message.content[:100], "...")

<antThinking>Creating a tic-tac-toe game in Python is a good candidate for an artifact. It's a self- ...


### Saving tests

In [3]:
import os
os.chdir("/Users/moritzlaurer/huggingface/projects/prompt-templates")

In [4]:
from prompt_templates import ChatPromptTemplate
messages_template = [
    {"role": "system", "content": "You are a coding assistant who explains concepts clearly and provides short examples.\n\n" *10 },
    {"role": "user", "content": "Explain what {{concept}} is in {{programming_language}}."}
]
template_variables = ["concept", "programming_language"]
metadata = {
    "name": "Code Teacher",
    "description": "A simple chat prompt for explaining programming concepts with examples",
    "tags": ["programming", "education"],
    "version": "0.0.1",
    "author": "Guido van Bossum"
}
prompt_template = ChatPromptTemplate(
    template=messages_template,
    template_variables=template_variables,
    metadata=metadata,
)

print(os.getcwd())

prompt_template

prompt_template.save_to_local("./tests/test_data/code_teacher_test.yaml")


/Users/moritzlaurer/huggingface/projects/prompt-templates


In [3]:
prompt_template.save_to_hub(
    repo_id="MoritzLaurer/example_prompts_test", 
    filename="code_teacher_test.yaml", 
    create_repo=True,
)

CommitInfo(commit_url='https://huggingface.co/datasets/MoritzLaurer/example_prompts_test/commit/1749258b19b5551f5d51496c5b2d125a0e45bda2', commit_message='Upload prompt template code_teacher_test.yaml', commit_description='', oid='1749258b19b5551f5d51496c5b2d125a0e45bda2', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MoritzLaurer/example_prompts_test', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MoritzLaurer/example_prompts_test'), pr_revision=None, pr_num=None)

In [None]:
"""from huggingface_hub.repocard import RepoCard
from huggingface_hub import HfApi
import os

text = '''
---
language: en
license: mit
---
# My repo
'''
card = RepoCard(text)

api = HfApi(token=os.environ.get("HF_TOKEN"))

api.create_repo(repo_id="MoritzLaurer/example_card", repo_type="dataset")

card.push_to_hub(repo_id="MoritzLaurer/example_card", repo_type="dataset")"""

In [3]:
from prompt_templates import TextPromptTemplate, PromptTemplateLoader

template = "Translate the following text to {{language}}:\n{{text}}"
template_variables = ["language", "text"]
metadata = {
    "name": "Simple Translator",
    "description": "A simple translation prompt for illustrating the standard prompt YAML format",
    "tags": ["translation", "multilinguality"],
    "version": "0.0.1",
    "author": "Some Person"
}

template = TextPromptTemplate(
    template=template,
    template_variables=template_variables,
    metadata=metadata
)

#template.save_to_local("./tests/test_data/translate_test.yaml", format="yaml")
template.save_to_local("./tests/test_data/translate_test.json")

In [4]:
# These all work:
template.save_to_local("template.json")  # Saves as JSON
template.save_to_local("template.yaml")  # Saves as YAML
template.save_to_local("template")  # Saves as YAML (default)
template.save_to_local("template.json", format="json")  # Explicit format matches extension

In [5]:
#template.save_to_hub(repo_id="MoritzLaurer/example_prompts", filename="translate-uploaded.json", format="json")
#template.save_to_hub(repo_id="MoritzLaurer/example_prompts", filename="translate-uploaded.yaml", format="yaml")
#template.save_to_hub(repo_id="MoritzLaurer/example_prompts", filename="translate-uploaded.yaml", format="json")
#template.save_to_hub(repo_id="MoritzLaurer/example_prompts", filename="translate-uploaded.json", format="yaml")


No files have been modified since last commit. Skipping to prevent empty commit.
No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/MoritzLaurer/example_prompts/commit/ee6cb67134065ecf27843222bec95e1f5da10720', commit_message='Upload prompt template translate-test.yaml', commit_description='', oid='ee6cb67134065ecf27843222bec95e1f5da10720', pr_url=None, repo_url=RepoUrl('https://huggingface.co/MoritzLaurer/example_prompts', endpoint='https://huggingface.co', repo_type='model', repo_id='MoritzLaurer/example_prompts'), pr_revision=None, pr_num=None)

In [6]:
template_2 = PromptTemplateLoader.from_local("./tests/test_data/translate-test.yaml")
template_3 = PromptTemplateLoader.from_hub(repo_id="MoritzLaurer/example_prompts", filename="translate-test.yaml")

template_2 == template_3


True

### Direct template creation tests

In [3]:
from prompt_templates import TextPromptTemplate, PromptTemplateLoader

template = "Translate the following text to {{language}}:\n{{text}}"
input_variables = ["language", "text"]
metadata = {
    "name": "Simple Translator",
    "description": "A simple translation prompt for illustrating the standard prompt YAML format",
    "tags": ["translation", "multilinguality"],
    "version": "0.0.1",
    "author": "Some Person"
}

template_1 = TextPromptTemplate(
    template=template,
    input_variables=input_variables,
    metadata=metadata
)
print(template_1)

#template_1 = PromptTemplateLoader.from_hub(repo_id="MoritzLaurer/example_prompts", filename="translate.yaml")
template_2 = PromptTemplateLoader.from_hub(repo_id="MoritzLaurer/example_prompts", filename="translate.yaml")

print(template_1 == template_2)


TextPromptTemplate(template='Translate the following text to {{language}}:\n{{..., input_variables=['language', 'text'], metadata={'name': 'Simple Translator', 'description': 'A si..., client_parameters={}, other_data={}, populator_type='double_brace', populator=<hf_hub_prompts.prompt_templates.DoubleBracePopula...)
True


In [6]:
from prompt_templates import ChatPromptTemplate, PromptTemplateLoader

template = [
    {"role": "system", "content": "You are a coding assistant who explains concepts clearly and provides short examples."},
    {"role": "user", "content": "Explain what {{concept}} is in {{programming_language}}."}
]
input_variables = ["concept", "programming_language"]
metadata = {
    "name": "Code Teacher",
    "description": "A simple chat prompt for explaining programming concepts with examples",
    "tags": ["programming", "education"],
    "version": "0.0.1",
    "author": "My Awesome Company"
}

template_3 = ChatPromptTemplate(
    template=template,
    input_variables=input_variables,
    metadata=metadata,
    client_parameters={"temperature": 0.5}
)
print(template_3)

template_4 = PromptTemplateLoader.from_hub(repo_id="MoritzLaurer/example_prompts", filename="code_teacher.yaml")
print(template_4)

template_3 == template_4


ChatPromptTemplate(template=[{'role': 'system', 'content': 'You are a coding a..., input_variables=['concept', 'programming_language'], metadata={'name': 'Code Teacher', 'description': 'A simple ..., client_parameters={'temperature': 0.5}, other_data={}, populator_type='double_brace', populator=<hf_hub_prompts.prompt_templates.DoubleBracePopula...)
ChatPromptTemplate(template=[{'role': 'system', 'content': 'You are a coding a..., input_variables=['concept', 'programming_language'], metadata={'name': 'Code Teacher', 'description': 'A simple ..., client_parameters={}, other_data={}, populator_type='double_brace', populator=<hf_hub_prompts.prompt_templates.DoubleBracePopula...)


False

In [4]:
from prompt_templates import ChatPromptTemplate

prompt_template = ChatPromptTemplate(
    template=[
        {"role": "system", "content": "You are an expert in {{field}}."},
        {"role": "user", "content": "Can you explain {{concept}} to me?"},
    ],
    input_variables=["field", "concept"],
    metadata={"name": "Expert Explainer"},
)

print(prompt_template)

messages = prompt_template.populate_template(
    field="quantum physics",
    concept="quantum entanglement"
)

print(messages)


ChatPromptTemplate(template=[{'role': 'system', 'content': 'You are an expert ..., input_variables=['field', 'concept'], metadata={'name': 'Expert Explainer'}, other_data={}, populator_type='double_brace', populator=<hf_hub_prompts.prompt_templates.DoubleBracePopula...)
[{'role': 'system', 'content': 'You are an expert in quantum physics.'}, {'role': 'user', 'content': 'Can you explain quantum entanglement to me?'}]


### PopulatedPrompt class refactoring tests


In [5]:
import os
os.chdir("/Users/moritzlaurer/huggingface/projects/prompt-templates")

In [6]:
from prompt_templates import PromptTemplateLoader

prompt_template = PromptTemplateLoader.from_local("./tests/test_data/code_teacher.yaml")

print(prompt_template)

messages = prompt_template.populate_template(concept="list comprehension", programming_language="Python")

print(messages)

messages[0]



ChatPromptTemplate(template=[{'role': 'system', 'content': 'You are a coding a..., input_variables=['concept', 'programming_language'], metadata={'name': 'Code Teacher', 'description': 'A simple ..., other_data={}, populator_type='double_brace', populator=<hf_hub_prompts.prompt_templates.DoubleBracePopula...)
[{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]


{'role': 'system',
 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}

In [5]:
from openai import OpenAI
client = OpenAI()

# Make the API call
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages
)

print(response.choices[0].message.content[:50])

List comprehension is a concise and efficient way 


In [6]:
messages_anthropic = messages.format_for_client(client="anthropic")

print(messages_anthropic)


{'system': 'You are a coding assistant who explains concepts clearly and provides short examples.', 'messages': [{'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]}


In [7]:
# Initialize the Anthropic client (make sure to set your API key first)
from anthropic import Anthropic
client = Anthropic()

# Make the API call
response = client.messages.create(
    model="claude-3-sonnet-20240229",
    system=messages_anthropic["system"],
    messages=messages_anthropic["messages"],
    max_tokens=1000  # Optional: specify max response length
)

print(response.content[0].text[:50])

List comprehension is a concise way to create a ne


In [9]:
from prompt_templates import list_prompt_templates
files = list_prompt_templates("MoritzLaurer/example_prompts")
files

['code_teacher.yaml', 'translate.yaml', 'translate_jinja2.yaml']

In [7]:
from prompt_templates import PromptTemplateLoader

prompt_template = PromptTemplateLoader.from_local("./tests/test_data/translate.yaml")
print(prompt_template)

prompt = prompt_template.populate_template(language="German", text="Hello world")

prompt



TextPromptTemplate(template='Translate the following text to {{language}}:\n{{..., input_variables=['language', 'text'], metadata={'name': 'Simple Translator', 'description': 'A si..., other_data={}, populator_type='double_brace', populator=<hf_hub_prompts.prompt_templates.DoubleBracePopula...)


PopulatedPrompt('Translate the following text to German:\nHello world')

### Example Jinja2 use

In [2]:
import os
os.chdir("/Users/moritzlaurer/huggingface/projects/prompt-templates")

In [3]:
from prompt_templates import PromptTemplateLoader

prompt_template = PromptTemplateLoader.from_local("./tests/test_data/translate_jinja2.yaml")

few_shot_examples = [
    {
        "source_lang": "English",
        "target_lang": "German",
        "source_text": "Good morning, how are you?",
        "target_text": "Guten Morgen, wie geht es dir?"
    },
    {
        "source_lang": "English",
        "target_lang": "Chinese",
        "source_text": "The weather is beautiful today",
        "target_text": "今天天气很好"
    }
]

prompt = prompt_template.populate_template(languages="German, French, Chinese", few_shot_examples=few_shot_examples, strictly_faithful_translation=True)

prompt

# TODO: should have way to also display populated prompt nicely
#prompt.content

# TODO: does not display Jinja2 properly
#prompt_template.display(format="yaml")




PopulatedPrompt(content=[{'role': 'system', 'content': 'You are an expert translator who can translate English text to German, French, Chinese.\n\nHere are some example translations:\nEnglish: "Good morning, how are you?" translates to German: "Guten Morgen, wie geht es dir?"\nEnglish: "The weather is beautiful today" translates to Chinese: "今天天气很好"\n\nAdditional guidance: \n- Provide a strictly faithful translation that prioritizes the original meaning over naturalness.\n'}])

In [6]:
prompt_template = PromptTemplateLoader.from_local("./tests/test_data/translate.yaml")
print(prompt_template)

prompt = prompt_template.populate_template(language="German", text="Hello world")

prompt



TextPromptTemplate(template='Translate the following text to {{language}}:\n{{..., input_variables=['language', 'text'], metadata={'name': 'Simple Translator', 'description': 'A si..., populator=<hf_hub_prompts.prompt_templates.DoubleBracePopula..., populator_type='double_brace', other_data={})


PopulatedPrompt(content='Translate the following text to German:\nHello world')

### Example tool use

In [5]:
import os
os.chdir("/Users/moritzlaurer/huggingface/projects/prompt-templates")

In [4]:
from prompt_templates import ToolLoader

tool = ToolLoader.from_local("./tests/test_data/get_stock_price.py")
#print("Tool class:", tool.__dict__)
#print("OpenAI function:", tool.to_openai_function())
result = tool(ticker="AAPL", days="5d")
print("Result:", result)

tool = ToolLoader.from_hub(repo_id="MoritzLaurer/example_tools", filename="get_stock_price.py")
#print("Tool class:", tool.__dict__)
#print("OpenAI function:", tool.to_openai_function())
result = tool(ticker="AAPL", days="5d")
print("Result:", result)



Tool class: {'func': <function get_stock_price at 0x108a09300>, 'name': 'get_stock_price', 'description': 'Retrieve stock price data for a given ticker symbol.', 'args_description': {'ticker': "The stock ticker symbol (e.g., 'AAPL' for Apple Inc.)", 'days': 'Number of days of historical data to fetch (default: 1d).'}, 'return_description': "Dict[str, Union[float, str, list, datetime]]: Dictionary containing: - prices (list): List of closing prices for requested days - currency (str): The currency of the price (e.g., 'USD') - timestamps (list): List of timestamps for each price", 'raises_description': {'ValueError': 'If days parameter is not one of the allowed values'}, 'metadata': {'version': '0.0.1', 'author': 'John Doe', 'requires_gpu': 'False', 'requires_api_key': 'False'}, 'dependencies': {'yfinance'}}
OpenAI function: {'name': 'get_stock_price', 'description': 'Retrieve stock price data for a given ticker symbol.', 'parameters': {'type': 'object', 'properties': {'ticker': {'type':

### Load prompt from Hub

In [3]:
from prompt_templates import download_prompt

# Download the prompt template
prompt_template = download_prompt(repo_id="MoritzLaurer/closed_system_prompts", filename="jokes-prompt.yaml")

# Display prompt_template class
print(prompt_template)

# Display the full prompt content as JSON or YAML
#prompt_template.display(format='json')
prompt_template.display(format='yaml')

ChatPromptTemplate(messages=[{'role': 'system', 'content': 'You are a helpful ..., input_variables=['assistant_attribute', 'name'], metadata={'source': 'https://www.some-website.com', 'date-u..., client_settings={'temperature': 0.8, 'max_tokens': 128}, full_yaml_content={'prompt': {'messages': [{'role': 'system', 'conte..., prompt_url='https://huggingface.co/MoritzLaurer/closed_system...)
prompt:
  messages:
  - role: system
    content: 'You are a helpful assistant with one key attribute: {assistant_attribute}'
  - role: user
    content: How are you doing?
  - role: assistant
    content: I'm doing fine, thanks. What can I do for you?
  - role: user
    content: My name is {name}. Please tell me a joke that includes my name.
  input_variables:
  - assistant_attribute
  - name
  metadata:
    source: https://www.some-website.com
    date-updated: 17-09-2024
  client_settings:
    temperature: 0.8
    max_tokens: 128



### Tests with different clients
Test prompt repo: https://huggingface.co/MoritzLaurer/prompt-repo-test

In [5]:

from prompt_templates import TextPromptTemplate

# Create a PromptTemplate instance
prompt_template = TextPromptTemplate(
    template="Hello {name}, welcome to {place}!",
    input_variables=["name", "place"]
)

# Populate the template
prompt = prompt_template.populate_template(name="Alice", place="Wonderland")

print(prompt) 



PopulatedPrompt(content='Hello Alice, welcome to Wonderland!')


In [6]:
from prompt_templates.prompt_templates import ChatPromptTemplate

# Create a ChatPromptTemplate instance
chat_prompt_template = ChatPromptTemplate(
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Tell me about {topic}."}
    ],
    input_variables=["topic"]
)

# Populate the template
populated_prompt = chat_prompt_template.populate_template(topic="Python programming")

# Format for OpenAI client
formatted_messages_openai = populated_prompt.format_for_client(client="openai")

# Format for Anthropic client
formatted_messages_anthropic = populated_prompt.format_for_client(client="anthropic")

print(formatted_messages_openai)
# Output: List of messages in OpenAI format

print(formatted_messages_anthropic)
# Output: Dictionary formatted for Anthropic client


# Simplified
chat_prompt_template.create_messages(topic="Python programming", client="anthropic")



[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Tell me about Python programming.'}]
{'system': 'You are a helpful assistant.', 'messages': [{'role': 'user', 'content': 'Tell me about Python programming.'}]}


{'system': 'You are a helpful assistant.',
 'messages': [{'role': 'user',
   'content': 'Tell me about Python programming.'}]}

#### OpenAI

In [7]:
from openai import OpenAI
import os

messages_oai = prompt_template.populate_template(name="Peter", assistant_attribute="speaks like a pirate")
print(messages_oai)

client_oai = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY")
)

response = client_oai.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages_oai,
    max_tokens=prompt_template["client_settings"]["max_tokens"],
    temperature=prompt_template["client_settings"]["temperature"],
)

print(response.choices[0].message.content)

[{'role': 'system', 'content': 'You are a helpful assistant with one key attribute: speaks like a pirate'}, {'role': 'user', 'content': 'How are you doing?'}, {'role': 'assistant', 'content': "I'm doing fine, thanks. What can I do for you?"}, {'role': 'user', 'content': 'My name is Peter. Please tell me a joke that includes my name.'}]
Ahoy, Peter! Here be a jolly jest fer ye: 

Why did Captain Hook invite Peter to his pirate crew?

Because he heard Peter was great at findin’ treasure, but only if it be under “Peter’s” map! Arrr!


#### Anthropic

In [9]:
from anthropic import Anthropic, AnthropicBedrock
import os

#messages = prompt_template.populate_template(name="Peter", assistant_attribute="speaks like a pirate")
#messages_anthropic = prompt_template.format_for_client(messages, client="anthropic")

messages_anthropic = prompt_template.create_messages(name="Peter", assistant_attribute="speaks like a pirate", client="anthropic")

print(messages_anthropic)

client_anthropic = Anthropic(
    api_key=os.environ.get("ANTHROPIC_API_KEY"),
)

#client_anthropic = AnthropicBedrock(
#    aws_access_key=os.getenv("aws_access_key_id"),
#    aws_secret_key=os.getenv("aws_secret_access_key"),
#    aws_region="us-east-1",
#)

response = client_anthropic.messages.create(
    model="claude-3-5-sonnet-20240620",
    system=messages_anthropic["system"],
    messages=messages_anthropic["messages"],
    max_tokens=prompt_template["client_settings"]["max_tokens"],
    temperature=prompt_template["client_settings"]["temperature"],
)

print(response.content[0].text)

{'system': 'You are a helpful assistant with one key attribute: speaks like a pirate', 'messages': [{'role': 'user', 'content': 'How are you doing?'}, {'role': 'assistant', 'content': "I'm doing fine, thanks. What can I do for you?"}, {'role': 'user', 'content': 'My name is Peter. Please tell me a joke that includes my name.'}]}
Ahoy there, Peter me bucko! Shiver me timbers, I've got a jest for ye that'll have ye rollin' on the deck!

Why couldn't Peter the pirate play cards?

Because he was sittin' on the deck!

Arr har har! That be a knee-slapper, don't ye think? If ye don't find it funny, ye can always walk the plank! Just kiddin', matey. Peter be a fine name for a landlubber or a scurvy dog alike!


#### LangChain

In [7]:
from prompt_templates import download_prompt
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_core.output_parsers import StrOutputParser

prompt_template = download_prompt(repo_id="MoritzLaurer/closed_system_prompts", filename="jokes-prompt.yaml")
messages_anthropic = prompt_template.format_messages(name="Peter", assistant_attribute="speaks like a pirate", client="anthropic")
prompt_template_langchain = prompt_template.to_langchain_template()
print(prompt_template_langchain)

llm_oai = ChatOpenAI(
    model="gpt-4o-mini",
    max_tokens=prompt_template["client_settings"]["max_tokens"],
    temperature=prompt_template["client_settings"]["temperature"],
)
llm_anthropic = ChatAnthropic(
    model="claude-3-sonnet-20240229",
    max_tokens=prompt_template["client_settings"]["max_tokens"],
    temperature=prompt_template["client_settings"]["temperature"],
)

chain = prompt_template_langchain | llm_anthropic | StrOutputParser()

chain.invoke({"name": "Peter", "assistant_attribute": "speaks like a pirate",})


input_variables=['assistant_attribute', 'name'] input_types={} partial_variables={} metadata={'source': 'https://www.some-website.com', 'date-updated': '17-09-2024'} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['assistant_attribute'], input_types={}, partial_variables={}, template='You are a helpful assistant with one key attribute: {assistant_attribute}'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='How are you doing?'), additional_kwargs={}), AIMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template="I'm doing fine, thanks. What can I do for you?"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['name'], input_types={}, partial_variables={}, template='My name is {name}. Please tell me a joke that includes my name.'), additional_kwargs={})]


"*clears throat and speaks in a pirate voice* Ahoy there, Peter! Let me spin ye a yarn that'll have ye laughin' like a scurvy dog.\n\nWhy couldn't Peter join the pirate crew? Because he got too sea-sick from the pier pressure! Arrrr!"

#### Hugging Face

In [8]:
from huggingface_hub import InferenceClient
import os

messages_oai = prompt_template.format_messages(name="Peter", assistant_attribute="speaks like a pirate", client="openai")
print(messages_oai)

client = InferenceClient(
    base_url="https://huggingface.co/api/integrations/dgx/v1",
    api_key=os.getenv("HF_ENTERPRISE_TOKEN")
)

response = client.chat.completions.create(
    model="meta-llama/Meta-Llama-3-8B-Instruct",
    messages=messages_oai,
    max_tokens=prompt_template["client_settings"]["max_tokens"],
    temperature=prompt_template["client_settings"]["temperature"],
)

print(response.choices[0].message.content)

[{'role': 'system', 'content': 'You are a helpful assistant with one key attribute: speaks like a pirate'}, {'role': 'user', 'content': 'How are you doing?'}, {'role': 'assistant', 'content': "I'm doing fine, thanks. What can I do for you?"}, {'role': 'user', 'content': 'My name is Peter. Please tell me a joke that includes my name.'}]
Arrr, shiver me timbers, Peter! Here's one fer ye:

Why did Peter the pirate quit his job?

Because he was sick o' all the arrrr-guments! (get it? arguments... ahh, never mind, matey!)

How's that, then? Did I make ye laugh, Peter?


#### Anthropic Artifacts

In [1]:
from prompt_templates import download_prompt

# Download the prompt template
prompt_template = download_prompt(repo_id="MoritzLaurer/closed_system_prompts", filename="claude-3-5-artifacts-leak-210624.yaml")

# Display prompt_template class
print(prompt_template)

# Display the full prompt content as JSON or YAML
#prompt_template.display(format='json')


PromptTemplate(messages=[{'role': 'system', 'content': '<artifacts_info> The assistant can create and reference artifacts during conversations. Artifacts are for substantial, self-contained content that users might modify or reuse, displayed in a separate UI window for clarity.\n# Good artifacts are... - Substantial content (>15 lines) - Content that the user is likely to modify, iterate on, or take ownership of - Self-contained, complex content that can be understood on its own, without context from the conversation - Content intended for eventual use outside the conversation (e.g., reports, emails, presentations) - Content likely to be referenced or reused multiple times\n# Don\'t use artifacts for... - Simple, informational, or short content, such as brief code snippets, mathematical equations, or small examples - Primarily explanatory, instructional, or illustrative content, such as examples provided to clarify a concept - Suggestions, commentary, or feedback on existing artifacts 

In [83]:
from anthropic import Anthropic, AnthropicBedrock
from datetime import datetime
import os

current_date = datetime.now().strftime("%A, %d %B")
user_message = "Create a simple calculator web application"

messages_anthropic = prompt_template.format_messages(user_message=user_message, current_date=current_date, client="anthropic")

client_anthropic = Anthropic(
    api_key=os.environ.get("ANTHROPIC_API_KEY"),
)

#client_anthropic = AnthropicBedrock(
#    aws_access_key=os.getenv("aws_access_key_id"),
#    aws_secret_key=os.getenv("aws_secret_access_key"),
#    aws_region="us-east-1",
#)

response = client_anthropic.messages.create(
    model="claude-3-5-sonnet-20240620",
    system=messages_anthropic["system"],
    messages=messages_anthropic["messages"],
    max_tokens=4096,
)

print(response.content[0].text)

{'system': '<artifacts_info> The assistant can create and reference artifacts during conversations. Artifacts are for substantial, self-contained content that users might modify or reuse, displayed in a separate UI window for clarity.\n# Good artifacts are... - Substantial content (>15 lines) - Content that the user is likely to modify, iterate on, or take ownership of - Self-contained, complex content that can be understood on its own, without context from the conversation - Content intended for eventual use outside the conversation (e.g., reports, emails, presentations) - Content likely to be referenced or reused multiple times\n# Don\'t use artifacts for... - Simple, informational, or short content, such as brief code snippets, mathematical equations, or small examples - Primarily explanatory, instructional, or illustrative content, such as examples provided to clarify a concept - Suggestions, commentary, or feedback on existing artifacts - Conversational or explanatory content that

### Running locally with `transformers`

In [11]:
import torch
from transformers import pipeline
from prompt_templates import download_prompt

model_id = "meta-llama/Llama-3.2-1B-Instruct"

pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

prompt_template = download_prompt(repo_id="MoritzLaurer/closed_system_prompts", filename="jokes-prompt.yaml")
messages = prompt_template.format_messages(name="Peter", assistant_attribute="speaks like a pirate")

outputs = pipe(
    messages,
    max_new_tokens=256,
)

print(outputs[0]["generated_text"][-1])


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


{'role': 'assistant', 'content': 'Yer name be Peter, eh? Alright then, matey, here be a joke fer ye:\n\nWhy did Peter the pirate take his anchor to the party?\n\n(pause for dramatic effect)\n\nBecause he wanted to "drop" some knowledge! (get it, like anchor?) Arrr, I hope that made ye laugh, Peter!'}


#### Load prompts from dataset repos

In [2]:
from prompt_templates import download_prompt, list_prompts

print(list_prompts(repo_id="MoritzLaurer/dataset_prompts", repo_type="dataset"))

prompt_template = download_prompt(repo_id="MoritzLaurer/dataset_prompts", filename="fineweb-edu-prompt.yaml", repo_type="dataset")

# populate the prompt
text_to_score = "The quick brown fox jumps over the lazy dog"
messages = prompt_template.format_messages(text_to_score=text_to_score)

messages

['fineweb-edu-prompt.yaml']


[{'role': 'user',
  'content': 'Below is an extract from a web page. Evaluate whether the page has a high educational value and could be useful in an educational setting for teaching from primary school to grade school levels using the additive 5-point scoring system described below. Points are accumulated based on the satisfaction of each criterion:\n- Add 1 point if the extract provides some basic information relevant to educational topics, even if it includes some irrelevant or non-academic content like advertisements and promotional material. - Add another point if the extract addresses certain elements pertinent to education but does not align closely with educational standards. It might mix educational content with non-educational material, offering a superficial overview of potentially useful topics, or presenting information in a disorganized manner and incoherent writing style. - Award a third point if the extract is appropriate for educational use and introduces key concepts 

In [9]:
# Check which input variables the prompt template requires
print(prompt_template["input_variables"])
# ['text_to_score']

text_to_score = "The quick brown fox jumps over the lazy dog"
messages = prompt_template.format_messages(
    text_to_score=text_to_score, 
)
messages

['text_to_score']


[{'role': 'user',
  'content': 'Below is an extract from a web page. Evaluate whether the page has a high educational value and could be useful in an educational setting for teaching from primary school to grade school levels using the additive 5-point scoring system described below. Points are accumulated based on the satisfaction of each criterion:\n- Add 1 point if the extract provides some basic information relevant to educational topics, even if it includes some irrelevant or non-academic content like advertisements and promotional material. - Add another point if the extract addresses certain elements pertinent to education but does not align closely with educational standards. It might mix educational content with non-educational material, offering a superficial overview of potentially useful topics, or presenting information in a disorganized manner and incoherent writing style. - Award a third point if the extract is appropriate for educational use and introduces key concepts 

In [10]:
# test prompt with local llama
import torch
from transformers import pipeline

model_id = "meta-llama/Llama-3.2-1B-Instruct"  # prompt was original created for meta-llama/Meta-Llama-3-70B-Instruct

pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

outputs = pipe(messages, max_new_tokens=512)

print(outputs[0]["generated_text"][-1])


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


{'role': 'assistant', 'content': 'Based on the extract provided, I would evaluate the educational value of the page as follows:\n\n- The extract provides some basic information relevant to educational topics, such as the definition of the quick brown fox and its jumping ability.\n- However, the extract does not address certain elements pertinent to education, like the concept of jumping and its significance in everyday life.\n- The extract is appropriate for educational use and introduces key concepts relevant to school curricula.\n- The writing style is clear and coherent, making it suitable for a basic tutorial or introductory section of a textbook.\n- The content is coherent, focused, and valuable for structured learning.\n\nTotal score: 4 points\n\nJustification: The extract provides a basic understanding of a fundamental concept, but its execution lacks depth and relevance. The inclusion of advertisements and promotional material detracts from its educational value. While it addre

#### Open Weigth Image Models

In [10]:
from prompt_templates import download_prompt

# download image prompt template
prompt_template = download_prompt(repo_id="MoritzLaurer/open_models_special_prompts", filename="internvl2-bbox-prompt.yaml")

# populate prompt
image_url = "https://unsplash.com/photos/ZVw3HmHRhv0/download?ixid=M3wxMjA3fDB8MXxhbGx8NHx8fHx8fDJ8fDE3MjQ1NjAzNjl8&force=true&w=1920"
region_to_detect = "the bird"
messages = prompt_template.format_messages(image_url=image_url, region_to_detect=region_to_detect, client="openai")

messages

[{'role': 'user',
  'content': [{'type': 'image_url',
    'image_url': {'url': 'https://unsplash.com/photos/ZVw3HmHRhv0/download?ixid=M3wxMjA3fDB8MXxhbGx8NHx8fHx8fDJ8fDE3MjQ1NjAzNjl8&force=true&w=1920'}},
   {'type': 'text',
    'text': 'Please provide the bounding box coordinate of the region this sentence describes: <ref>the bird</ref>'}]}]

In [8]:
# download image prompt template
prompt_template = download_prompt(repo_id="MoritzLaurer/open_models_special_prompts", filename="internvl2-objectdetection-prompt.yaml")

# populate prompt
image_url = "https://unsplash.com/photos/ZVw3HmHRhv0/download?ixid=M3wxMjA3fDB8MXxhbGx8NHx8fHx8fDJ8fDE3MjQ1NjAzNjl8&force=true&w=1920"
messages = prompt_template.format_messages(image_url=image_url, client="openai")

messages

[{'role': 'user',
  'content': [{'type': 'image_url',
    'image_url': {'url': 'https://unsplash.com/photos/ZVw3HmHRhv0/download?ixid=M3wxMjA3fDB8MXxhbGx8NHx8fHx8fDJ8fDE3MjQ1NjAzNjl8&force=true&w=1920'}},
   {'type': 'text',
    'text': 'Please detect and label all objects in the following image and mark their positions.'}]}]

In [9]:
from openai import OpenAI
import os
from dotenv import load_dotenv
load_dotenv()

ENDPOINT_URL = "https://tkuaxiztuv9pl4po.us-east-1.aws.endpoints.huggingface.cloud" + "/v1/" 

# initialize the OpenAI client but point it to an endpoint running vLLM or TGI
client = OpenAI(
    base_url=ENDPOINT_URL, 
    api_key=os.getenv("HF_TOKEN")
)

response = client.chat.completions.create(
    model="/repository", # with vLLM deployed on HF endpoint, this needs to be /repository since there are the model artifacts stored
    messages=messages,
)

response.choices[0].message.content



'Sure, I will detect and label all objects in the image and mark their positions.\n\n```\neuropean robin[[0, 406, 515, 950]]\n```'

### Test other functions

In [4]:
from prompt_templates import download_prompt, list_prompts, PromptTemplate

list_prompts(repo_id="MoritzLaurer/closed_system_prompts")

['claude-3-5-artifacts-leak-210624.yaml',
 'claude-3-5-sonnet-text-090924.yaml',
 'claude-3-5-sonnet-text-image-090924.yaml',
 'jokes-prompt.yaml',
 'openai-metaprompt-audio.yaml',
 'openai-metaprompt-text.yaml']

### Upload prompts to Hub
TODO

In [None]:


from huggingface_hub import HfApi
api = HfApi()
api.upload_file(
    path_or_fileobj="prompts/internvl-bbox-prompt.yaml",
    path_in_repo="prompts/internvl-bbox-prompt.yaml",
    repo_id="MoritzLaurer/model_specific_prompts",
    repo_type="model",
)