In [1]:
import torch

In [2]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")
model = AutoModelForCausalLM.from_pretrained("google/gemma-3-1b-it")

  from .autonotebook import tqdm as notebook_tqdm


In [42]:
# Method 1: Using offset mappings (most efficient)
def find_token_positions_with_offsets(tokenizer, text, search_strings):
    """
    Find token positions for search strings using offset mappings
    """
    # Tokenize with return_offsets_mapping=True
    encoding = tokenizer(text, return_offsets_mapping=True, return_tensors="pt")
    
    token_positions = {}
    
    for search_string in search_strings:
        # Find character positions
        start_char = text.find(search_string)
        if start_char == -1:
            continue
        end_char = start_char + len(search_string)
        
        # Find corresponding token positions using offsets
        start_token = None
        end_token = None
        
        for i, (token_start, token_end) in enumerate(encoding['offset_mapping'][0]):
            # Find start token
            if start_token is None and token_start <= start_char < token_end:
                start_token = i
            # Find end token
            if end_token is None and token_start < end_char <= token_end:
                end_token = i
                break
            # Handle case where end_char is exactly at token boundary
            if end_token is None and token_start == end_char:
                end_token = i - 1
                break
        
        if start_token is not None and end_token is not None:
            token_positions[search_string] = (start_token, end_token)
    
    return encoding, token_positions


In [178]:
input_text_2 = "Just vibe it."
input_text_1 = (
    "A robust approach is to use trigonometry to solve the problem. Since the user has provided the angles and the length of the arm, "
    "we can leverage trigonometric principles to construct a precise forward kinematics model. This method allows us to accurately compute the position of the arm's end effector, "
    "generate the required code, and analyze the trajectory error, ensuring both correctness and efficiency in the solution."
)

prompt = (
    f"Idea 1: {input_text_1}\n"
    f"Idea 2: {input_text_2}\n"
    "You need to return your choice in json format as follows: {\"idea\": \"1\" or \"2\"}, nothing else. No additional text or comments."
)

messages = [
    [
        {
            "role": "system",
            "content": [{"type": "text", "text": "You are a helpful assistant helping to write code for a mechanical arm. You are presented with multiple ideas to solve the problem. You need to write the code for the best idea. You need to first select the idea 1 or 2 ONLY"},]
        },
        {
            "role": "user",
            "content": [{"type": "text", "text": prompt},]
        },
    ],
]

In [190]:
# chat_tokens = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", return_dict=True)
# offsets      = tokenizer.apply_chat_template(messages, tokenize=False, return_offsets_mapping=True)

In [189]:

# Test with our strings
test_text = prompt
search_strings = [input_text_1, input_text_2]


# encoding, positions = find_token_positions_with_offsets(tokenizer, test_text, search_strings)

print("Using offset mappings:")
idea_start_end = {}
# for i, (text, (start, end)) in enumerate(positions.items()):
#     print(f"Idea {i+1}: {text}, {start}, {end}")
#     idea_start_end[i] = (start, end)
inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    tokenize=True,
    return_dict=True,
    return_tensors="pt",
).to(model.device).to(torch.bfloat16)
offsets = tokenizer.apply_chat_template(messages, tokenize=False, return_offsets_mapping=True)
encoding, positions = find_token_positions_with_offsets(tokenizer, test_text, search_strings)

for i, (text, (start, end)) in enumerate(positions.items()):
    print(f"Idea {i+1}: {text}, {start}, {end}")
    idea_start_end[i] = (start, end)

outputs = model.generate(**inputs, max_new_tokens=100, output_attentions=True, output_hidden_states=True, return_dict_in_generate=True)
print(tokenizer.decode(outputs.sequences[0]))
# The first level is the tokens level outputs.
# The second level is the attention layers levels.
# I need a tensor of the shape [batch_size, num_heads, seq_len + output_len, seq_len]
layers_outs = torch.concat([layer[0] for layer in outputs.attentions[1:]], dim = -2)
for idea_num, (start, end) in idea_start_end.items():
    print('\n', start, end, type(start), type(end))
    idea_attention = layers_outs[:,:,:,start:end+1]
    idea_mean = idea_attention.mean(dim=(1, 2, 3))  # (batch, 1, 1, 1)
    print(f"Idea {idea_num+1} mean: {idea_mean}")

Attempting to cast a BatchEncoding to type torch.bfloat16. This is not supported.


Using offset mappings:
Idea 1: A robust approach is to use trigonometry to solve the problem. Since the user has provided the angles and the length of the arm, we can leverage trigonometric principles to construct a precise forward kinematics model. This method allows us to accurately compute the position of the arm's end effector, generate the required code, and analyze the trajectory error, ensuring both correctness and efficiency in the solution., 5, 80
Idea 2: Just vibe it., 86, 89
<bos><start_of_turn>user
You are a helpful assistant helping to write code for a mechanical arm. You are presented with multiple ideas to solve the problem. You need to write the code for the best idea. You need to first select the idea 1 or 2 ONLY

Idea 1: A robust approach is to use trigonometry to solve the problem. Since the user has provided the angles and the length of the arm, we can leverage trigonometric principles to construct a precise forward kinematics model. This method allows us to accurat