# Simple tool formatting with HF
Showcasing how to use the new template format from HF for tool calling

## Loading the models

In [None]:
# from huggingface_hub import login
# login()

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.bfloat16,
        device_map="auto",
    )




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [41]:
# del model
# import gc
# gc.collect()
# torch.cuda.empty_cache()

## Setting up the system messages and default tools 

In [67]:
dialogs = [
    [
        {"role": "system", "content": "You are a helpful chatbot"},
        {"role": "user", "content": "What is the weather today in San Francisco?"},
    ],
    [
        {"role": "system", "content": "You are a helpful chatbot"},
        {"role": "user", "content": "What is the weather today in San Francisco?"},
    ],
  ]

messages = [
        {"role": "system", "content": "You are a helpful chatbot"},
        {"role": "user", "content": "What is the weather today in San Francisco?"},
    ]

builtin_tools = ["code_interpreter", "wolfram_alpha", "brave_search"]

json_tools = [ 
    { "type": "function",
      "function": {
          "name": "spotify_trending_songs",
          "description": "Get top trending songs on Spotify",
          "parameters": {
            "n": {
              "param_type": "int",
              "description": "Number of trending songs to get",
              "required": "true"
            }
          }
        }
   },
    {
      "type": "function",
      "function": {
        "name": "get_current_temperature",
        "description": "Get the current temperature for a specific location",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {
              "type": "string",
              "description": "The city and state, e.g., San Francisco, CA"
            },
            "unit": {
              "type": "string",
              "enum": ["Celsius", "Fahrenheit"],
              "description": "The temperature unit to use. Infer this from the user's location."
            }
          },
          "required": ["location", "unit"]
        }
      }
    },
    {
      "type": "function",
      "function": {
        "name": "get_rain_probability",
        "description": "Get the probability of rain for a specific location",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {
              "type": "string",
              "description": "The city and state, e.g., San Francisco, CA"
            }
          },
          "required": ["location"]
        }
      }
    }
]


json_tools = [ 
    { 
      "tool_name": "spotify_trending_songs",
      "description": "Get top trending songs on Spotify",
      "parameters": {
        "n": {
          "param_type": "int",
          "description": "Number of trending songs to get",
          "required": "true"
        }
      }
    
   },
    {
      "type": "function",
      "function": {
        "name": "get_rain_probability",
        "description": "Get the probability of rain for a specific location",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {
              "type": "string",
              "description": "The city and state, e.g., San Francisco, CA"
            }
          },
          "required": ["location"]
        }
      }
    }
]



## Converting to input ids and checking how the prompt format was applied

In [21]:
# for messages in dialog:
# Shouldn't output the Environment instruction, but it is.
input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

print(tokenizer.decode(input_ids[0], skip_special_tokens=False))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Environment: ipython
Cutting Knowledge Date: December 2023
Today Date: 23 Jul 2024

You are a helpful chatbot<|eot_id|><|start_header_id|>user<|end_header_id|>

What is the weather today in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>




In [22]:
input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt",
        builtin_tools=builtin_tools
    ).to(model.device)

print(tokenizer.decode(input_ids[0], skip_special_tokens=False))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Environment: ipython
Tools: wolfram_alpha, brave_search

Cutting Knowledge Date: December 2023
Today Date: 23 Jul 2024

You are a helpful chatbot<|eot_id|><|start_header_id|>user<|end_header_id|>

What is the weather today in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>




In [30]:
input_ids = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt",
            custom_tools=json_tools,
        ).to(model.device)
    
print(tokenizer.decode(input_ids[0], skip_special_tokens=False))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Environment: ipython
Cutting Knowledge Date: December 2023
Today Date: 23 Jul 2024

You are a helpful chatbot<|eot_id|><|start_header_id|>user<|end_header_id|>

Use the function'spotify_trending_songs' to 'Get top trending songs on Spotify':
{"name": "spotify_trending_songs", "description": "Get top trending songs on Spotify", "parameters": {
    "n": {
        "param_type": "int",
        "description": "Number of trending songs to get",
        "required": "true"
    }
}Use the function 'get_rain_probability' to 'Get the probability of rain for a specific location':
{"name": "get_rain_probability", "description": "Get the probability of rain for a specific location", "parameters": {
    "type": "object",
    "properties": {
        "location": {
            "type": "string",
            "description": "The city and state, e.g., San Francisco, CA"
        }
    },
    "required": [
        "location"
    ]
}

Think very care

In [68]:
input_ids = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt",
            custom_tools=json_tools,
            builtin_tools=builtin_tools
        ).to(model.device)
    
print(tokenizer.decode(input_ids[0], skip_special_tokens=False))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Environment: ipython
Tools: wolfram_alpha, brave_search

Cutting Knowledge Date: December 2023
Today Date: 23 Jul 2024

You are a helpful chatbot<|eot_id|><|start_header_id|>user<|end_header_id|>

Use the function'spotify_trending_songs' to 'Get top trending songs on Spotify':
{"name": "spotify_trending_songs", "description": "Get top trending songs on Spotify", "parameters": {
    "n": {
        "param_type": "int",
        "description": "Number of trending songs to get",
        "required": "true"
    }
}Use the function 'get_rain_probability' to 'Get the probability of rain for a specific location':
{"name": "get_rain_probability", "description": "Get the probability of rain for a specific location", "parameters": {
    "type": "object",
    "properties": {
        "location": {
            "type": "string",
            "description": "The city and state, e.g., San Francisco, CA"
        }
    },
    "required": [
       

## Running inference 


In [69]:
attention_mask = torch.ones_like(input_ids)
outputs = model.generate(
    input_ids,
    max_new_tokens=400,
    eos_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    attention_mask=attention_mask,
)
response = outputs[0][input_ids.shape[-1]:]
print("\nOutput:\n")
model_output = tokenizer.decode(response, skip_special_tokens=True)
print(model_output)

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.



Output:

<function=get_rain_probability>{"location": "San Francisco, CA"}</function>


In [70]:
tool_output = {"rain_probability": 0.2, "location": "San Francisco, CA", "weather_description": "Partly cloudy with a high of 62\u00b0F and a low of 51\u00b0F"}

messages.append({"role": "assistant", "content": model_output})
messages.append({"role": "ipython", "content": tool_output})

input_ids = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt",
            custom_tools=json_tools,
            builtin_tools=builtin_tools
        ).to(model.device)
    
print(tokenizer.decode(input_ids[0], skip_special_tokens=False))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Environment: ipython
Tools: wolfram_alpha, brave_search

Cutting Knowledge Date: December 2023
Today Date: 23 Jul 2024

You are a helpful chatbot<|eot_id|><|start_header_id|>user<|end_header_id|>

Use the function'spotify_trending_songs' to 'Get top trending songs on Spotify':
{"name": "spotify_trending_songs", "description": "Get top trending songs on Spotify", "parameters": {
    "n": {
        "param_type": "int",
        "description": "Number of trending songs to get",
        "required": "true"
    }
}Use the function 'get_rain_probability' to 'Get the probability of rain for a specific location':
{"name": "get_rain_probability", "description": "Get the probability of rain for a specific location", "parameters": {
    "type": "object",
    "properties": {
        "location": {
            "type": "string",
            "description": "The city and state, e.g., San Francisco, CA"
        }
    },
    "required": [
       

## Reprompting the model

In [73]:
attention_mask = torch.ones_like(input_ids)
outputs = model.generate(
    input_ids,
    max_new_tokens=512,
    eos_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.75,
    top_p=0.8,
    attention_mask=attention_mask,
)
response = outputs[0][input_ids.shape[-1]:]
print("\nOutput:\n")
model_output = tokenizer.decode(response, skip_special_tokens=True)
print(model_output)

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.



Output:

Since the weather information is not up to date, let's try searching for it using brave_search:

<function=brave_search>{"query": "what is the weather today in San Francisco"}</function>


In [13]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [15]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
messages_repetition_test  = [
    {"role": "user", "content": "Any prompt should have this issue"},
]

repetition_input_ids = tokenizer.apply_chat_template(
            messages_repetition_test,
            add_generation_prompt=True,
            return_tensors="pt",
            
        ).to(model.device)

tokenizer.eos_token_id = [128008, 128009]

print(tokenizer.eos_token_id)

attention_mask = torch.ones_like(repetition_input_ids)
outputs = model.generate(
    repetition_input_ids,
    max_new_tokens=512,
    eos_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
    attention_mask=attention_mask,
)
response = outputs[0][repetition_input_ids.shape[-1]:]
print("\nOutput:\n")
model_output = tokenizer.decode(response, skip_special_tokens=False)
print(model_output)

None

Output:

It seems like you're trying to initiate a conversation, but there's a bit of a snag. You're saying that there should be an issue with any prompt I receive, but I'm not quite sure what that means. Could you clarify what kind of issue you're thinking of? Is it something specific, like a technical problem, or more general, like a philosophical conundrum?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

It seems like you're trying to communicate a problem or a challenge, but I'm not sure what the specific issue is. You're saying "Any prompt should have this issue", but I'm not sure what "this issue" refers to.

Could you try rephrasing or providing more context about what you mean by "this issue"? Are you looking for a prompt that presents a particular challenge or problem, or are you trying to point out a flaw in the way I respond to prompts?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

I think I understand what you're getting at. You want me to acknowledg

In [17]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
messages_repetition_test  = [
    {"role": "system", "content": """
    Environment: ipython
Tools: brave_search, wolfram_alpha

Cutting Knowledge Date: December 2023
Today Date: 23 Jul 2024

You are a helpful assistant"""},
    {"role": "user", "content": "What is the current weather in Menlo Park, California?"},
]


repetition_input_ids = tokenizer.apply_chat_template(
            messages_repetition_test,
            add_generation_prompt=True,
            return_tensors="pt",
            
        ).to(model.device)

print(tokenizer.eos_token_id)

attention_mask = torch.ones_like(repetition_input_ids)
outputs = model.generate(
    repetition_input_ids,
    max_new_tokens=512,
    eos_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
    attention_mask=attention_mask,
)
response = outputs[0][repetition_input_ids.shape[-1]:]
print("\nOutput:\n")
model_output = tokenizer.decode(response, skip_special_tokens=False)
print(model_output)

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


128009

Output:

<|python_tag|>brave_search.call(query="current weather in Menlo Park, California")<|eom_id|><|start_header_id|>assistant<|end_header_id|>

<|python_tag|>The current weather in Menlo Park, California is partially cloudy with a high of 68°F and a low of 55°F.<|eot_id|>


In [18]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
messages_repetition_test  = [
    {"role": "system", "content": """
    Environment: ipython
Tools: brave_search, wolfram_alpha

Cutting Knowledge Date: December 2023
Today Date: 23 Jul 2024

You are a helpful assistant"""},
    {"role": "user", "content": "What is the current weather in Menlo Park, California?"},
]


repetition_input_ids = tokenizer.apply_chat_template(
            messages_repetition_test,
            add_generation_prompt=True,
            return_tensors="pt",
            
        ).to(model.device)

print(tokenizer.eos_token_id)

attention_mask = torch.ones_like(repetition_input_ids)
outputs = model.generate(
    repetition_input_ids,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
    attention_mask=attention_mask,
)
response = outputs[0][repetition_input_ids.shape[-1]:]
print("\nOutput:\n")
model_output = tokenizer.decode(response, skip_special_tokens=False)
print(model_output)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


128009

Output:

<|python_tag|>brave_search.call(query="Menlo Park California weather")<|eom_id|>


In [2]:
from transformers import pipeline
import torch

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
pipe = pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device="cuda",
)

messages = [
    {"role": "user", "content": """Given the family relationships:
* Carol is Emily's parent.
* Emily is Henry's parent.
* Abigail is Gary's parent.
* Gary is Sean's parent.
* Emily is Abigail's parent.
What is Carol's relationship to Abigail?
Select the correct answer:
1. Carol is Abigail's grandchild.
2. Carol is Abigail's sibling.
3. Carol is Abigail's grandparent.
Enclose the selected answer number in the <ANSWER> tag, for example: <ANSWER>1</ANSWER>."""},
]
outputs = pipe(
    messages,
    max_new_tokens=512,
    do_sample=True,
)
assistant_response = outputs[0]["generated_text"][-1]["content"]
print(assistant_response)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Let's break down the relationships:

* Carol is Emily's parent.
* Emily is Henry's parent.
* Abigail is Gary's parent.
* Gary is Sean's parent.
* Emily is Abigail's parent.

This creates a loop where Emily is both Carol's child and Abigail's child. This is a contradiction, but let's try to find a relationship between Carol and Abigail.

Since Emily is both Carol's child and Abigail's child, and Emily is also Henry's parent, we can conclude that Carol and Abigail are not siblings (option 2 is incorrect).

Now, let's look at the options:

* Option 1: Carol is Abigail's grandchild. This is not possible, as Carol is Emily's parent, and Emily is Abigail's child.
* Option 3: Carol is Abigail's grandparent. This is also not possible, as Carol is Emily's parent, and Emily is Abigail's child.

However, if we re-examine the relationships, we can see that Carol is Emily's parent, and Emily is Abigail's child. This makes Carol Abigail's parent-in-law, but not a grandparent. However, this is not an