In [1]:

from transformers import AutoProcessor, AutoModelForCausalLM, AutoTokenizer
import torch
from datasets import load_dataset, Dataset 

  from .autonotebook import tqdm as notebook_tqdm
  if not hasattr(np, "object"):
W1231 21:20:58.258000 31636 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
import torch, torchvision, torchaudio
print(torch.__version__)
print(torchvision.__version__)
print(torchaudio.__version__)
print(torch.version.cuda)

2.9.1+cu126
0.24.1+cu126
2.9.1+cu126
12.6


In [3]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

print(f"Using device: {device}")

Using device: cuda


In [4]:
max_seq_length = 4096 


In [5]:
processor = AutoProcessor.from_pretrained("google/functiongemma-270m-it", device_map="auto")
model = AutoModelForCausalLM.from_pretrained("google/functiongemma-270m-it", dtype="auto", device_map="auto")

In [6]:
tokenizer = AutoTokenizer.from_pretrained("google/functiongemma-270m-it")

In [7]:
import peft
from peft import LoraConfig

In [10]:
lora_config = LoraConfig(
    r=128,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=128*2,
    lora_dropout=0.05,
    bias="none",
    use_rslora=False,

)

model = peft.get_peft_model(model, lora_config)

In [11]:
messages_1 = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello, who are you?"},
]

rendered_1 = tokenizer.apply_chat_template(
    messages_1,
    tools = [], # no tools
    add_generation_prompt = False,
    tokenize = False,
)

print("=== Example 1: Basic turns ===")
print(rendered_1)

=== Example 1: Basic turns ===
<bos><start_of_turn>developer
You are a helpful assistant.<end_of_turn>
<start_of_turn>user
Hello, who are you?<end_of_turn>



 <start_function_declaration>declaration:get_weather{...}<end_function_declaration> encodes the full function spec (name, description, parameters) so the model knows what tools it can call and how to format arguments.

In [12]:
tools_2 = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a given city.",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "City name, e.g. 'Tokyo'.",
                    }
                },
                "required": ["city"],
            },
        },
    }
]

messages_2 = [
    {"role": "system", "content": "You are a weather assistant."},
    {"role": "user", "content": "What is the weather in Tokyo?"},
]

rendered_2 = tokenizer.apply_chat_template(
    messages_2,
    tools = tools_2,
    add_generation_prompt = False,
    tokenize = False,
)

print("=== Example 2: Tool declarations ===")
print(rendered_2)

=== Example 2: Tool declarations ===
<bos><start_of_turn>developer
You are a weather assistant.<start_function_declaration>declaration:get_weather{description:<escape>Get the current weather for a given city.<escape>,parameters:{properties:{city:{description:<escape>City name, e.g. 'Tokyo'.<escape>,type:<escape>STRING<escape>}},required:[<escape>city<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>
<start_of_turn>user
What is the weather in Tokyo?<end_of_turn>



In [13]:
messages_3 = [
    {
        "role": "system",
        "content": "You are a weather assistant.",
    },
    {
        "role": "user",
        "content": "What is the weather in Tokyo?",
    },
    # Assistant issues a tool call
    {
        "role": "assistant",
        "content": "",
        "tool_calls": [
            {
                "id": "call_1",
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "arguments": {"city": "Tokyo"},
                },
            }
        ],
    },
    # Tool (infrastructure) responds
    {
        "role": "tool",
        "name": "get_weather",
        "tool_call_id": "call_1",
        "content": '{"city": "Tokyo", "temp_c": 25, "condition": "sunny"}',
    },
    # Assistant gives final natural-language answer
    {
        "role": "assistant",
        "content": "It is currently 25Â°C and sunny in Tokyo.",
    },
]

rendered_3 = tokenizer.apply_chat_template(
    messages_3,
    tools = tools_2,
    add_generation_prompt = False,
    tokenize = False,
)

print("=== Example 3: User â†’ Model â†’ Tool â†’ Model ===")
print(rendered_3)

=== Example 3: User â†’ Model â†’ Tool â†’ Model ===
<bos><start_of_turn>developer
You are a weather assistant.<start_function_declaration>declaration:get_weather{description:<escape>Get the current weather for a given city.<escape>,parameters:{properties:{city:{description:<escape>City name, e.g. 'Tokyo'.<escape>,type:<escape>STRING<escape>}},required:[<escape>city<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>
<start_of_turn>user
What is the weather in Tokyo?<end_of_turn>
<start_of_turn>model
<start_function_call>call:get_weather{city:<escape>Tokyo<escape>}<end_function_call><start_function_response>response:get_weather{value:<escape>{"city": "Tokyo", "temp_c": 25, "condition": "sunny"}<escape>}<end_function_response>It is currently 25Â°C and sunny in Tokyo.<end_of_turn>



In [14]:
tools_4 = [
    {
        "type": "function",
        "function": {
            "name": "get_amazon_product_details",
            "description": (
                "Retrieves comprehensive product information from Amazon, "
                "including title, price, description, specifications, and availability."
            ),
            "parameters": {
                "type": "object",
                "properties": {
                    "asin": {
                        "type": "string",
                        "description": "The Amazon ASIN of the product.",
                    }
                },
                "required": ["asin"],
            },
        },
    }
]

messages_4 = [
    {
        "role": "system",
        "content": (
            "You are a shopping assistant. Use tools when you need detailed "
            "Amazon product data such as price and specifications."
        ),
    },
    {
        "role": "user",
        "content": "Is the espresso machine with ASIN B0XYZ12345 any good for home use?",
    },
    {
        "role": "assistant",
        "content": (
            "<think>"
            "User is asking for an opinion, but I need factual product details first "
            "such as price, features, and reviews. I should call the Amazon product "
            "details tool with the provided ASIN."
            "</think>"
        ),
        "tool_calls": [
            {
                "id": "call_1",
                "type": "function",
                "function": {
                    "name": "get_amazon_product_details",
                    "arguments": {
                        "asin": "B0XYZ12345"
                    },
                },
            }
        ],
    },
    {
        "role": "tool",
        "name": "get_amazon_product_details",
        "tool_call_id": "call_1",
        "content": (
            '{"title": "Home Pro Espresso 3000", '
            '"price": 199.99, '
            '"pressure_bar": 15, '
            '"features": ["steam wand", "single and double shot baskets"], '
            '"pros": ["good crema", "compact"], '
            '"cons": ["a bit noisy"]}'
        ),
    },
    {
        "role": "assistant",
        "content": (
            "<think>"
            "Tool response shows a mid-range price and standard 15 bar pressure. "
            "Features and pros/cons indicate itâ€™s fine for home espresso but not "
            "a high-end machine for enthusiasts."
            "</think>\n"
            "Based on the product details, the Home Pro Espresso 3000 (ASIN B0XYZ12345) "
            "is a solid option for home use. It offers 15-bar pressure, a steam wand, "
            "and both single and double shot baskets, which are enough for everyday "
            "lattes and cappuccinos. Itâ€™s compact and produces good crema, but it can "
            "be a bit noisy. If you want a convenient, reasonably priced home machine, "
            "it should work well; if youâ€™re very picky about espresso or plan to upgrade "
            "grinders and accessories, you might eventually want something more advanced."
        ),
    },
]

rendered_prompt = tokenizer.apply_chat_template(
    messages_4,
    tools = tools_4,
    add_generation_prompt = False,  # True if you want to open a fresh model turn for generation
    tokenize = False,
)

print("=== Thinking + Tools ===")
print(rendered_prompt)

=== Thinking + Tools ===
<bos><start_of_turn>developer
You are a shopping assistant. Use tools when you need detailed Amazon product data such as price and specifications.<start_function_declaration>declaration:get_amazon_product_details{description:<escape>Retrieves comprehensive product information from Amazon, including title, price, description, specifications, and availability.<escape>,parameters:{properties:{asin:{description:<escape>The Amazon ASIN of the product.<escape>,type:<escape>STRING<escape>}},required:[<escape>asin<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>
<start_of_turn>user
Is the espresso machine with ASIN B0XYZ12345 any good for home use?<end_of_turn>
<start_of_turn>model
<think>User is asking for an opinion, but I need factual product details first such as price, features, and reviews. I should call the Amazon product details tool with the provided ASIN.</think><start_function_call>call:get_amazon_product_details{asin:<escape>B0X

In [15]:
dataset = load_dataset("LLM360/TxT360-3efforts", name = "agent", split = "medium", streaming = True)

In [19]:
dataset = Dataset.from_list(list(dataset.take(10000)))

In [20]:
dataset

Dataset({
    features: ['messages'],
    num_rows: 10000
})

In [22]:
dataset[0]['messages']

'[{"role": "system", "tools": [{"name": "qrcodepro", "description": "Generates a professional QR code with customizable properties and settings.", "parameters": {"type": "object", "properties": {"text": {"description": "The text to encode into the QR code.", "type": "string", "default": "https://www.digicatech.com"}, "validate": {"description": "Whether to validate the input text. Defaults to None.", "type": "string", "default": true}, "setlabel": {"description": "Whether to set a label on the QR code. Defaults to None.", "type": "string", "default": false}, "forecolor": {"description": "The foreground color of the QR code in hexadecimal format without the # prefix. Defaults to \'000000\'.", "type": "string", "default": "000000"}, "type": {"description": "The output file type for the QR code (\'png\', \'svg\', or \'eps\'). Defaults to \'svg\'.", "type": "string", "default": "svg"}, "labeltext": {"description": "The text to use as a label in the QR code. Defaults to None.", "type": "str

In [23]:
import json

In [24]:
THINK_TAG_OPEN = "<think>"
THINK_TAG_CLOSE = "</think>"

In [25]:
#@title Helper Function: prepare_messages_and_tools

import json

THINK_TAG_OPEN = "<think>"
THINK_TAG_CLOSE = "</think>"

def prepare_messages_and_tools(example):
    raw = json.loads(example["messages"])
    msgs = [dict(m) for m in raw]

    # 1) Extract tools (same as before)
    tools_raw = []
    if msgs and isinstance(msgs[0], dict):
        tlist = msgs[0].get("tools")
        if isinstance(tlist, list) and tlist:
            tools_raw = tlist
            msgs[0].pop("tools", None)

    # 2) Merge assistant["think"] into ["content"]
    THINK_KEYS = ["think", "think_fast", "think_faster"]

    # TRACKER: Check if we successfully added thoughts
    has_valid_thought = False

    for m in msgs:
        if m.get("role") == "assistant":
            # Find the first available thinking key
            found_key = next((k for k in THINK_KEYS if m.get(k)), None)

            if found_key:
                think_text = m[found_key]
                content = m.get("content")
                think_block = f"{THINK_TAG_OPEN}{think_text}{THINK_TAG_CLOSE}"

                if isinstance(content, str) and content:
                    m["content"] = think_block + "\n" + content
                else:
                    m["content"] = think_block

                has_valid_thought = True

                # Clean up keys
                for k in THINK_KEYS:
                    m.pop(k, None)
            else:
                # If an assistant message HAS NO THOUGHT,
                # this example is "poison" for your goal.
                # We mark it as invalid to filter it out later.
                return None, None

    # If the conversation had no assistant turns at all (rare, but possible), skip it
    if not has_valid_thought:
        return None, None
    # 3) Normalize tool_calls to HF-style {type:'function', function:{name, arguments}}
    for m in msgs:
        if "tool_calls" not in m or not m["tool_calls"]:
            continue

        new_tool_calls = []
        for tc in m["tool_calls"]:
            if not isinstance(tc, dict):
                continue

            # Already has function dict?
            if "function" in tc and isinstance(tc["function"], dict):
                new_tool_calls.append(tc)
                continue

            fn_name = tc.get("name", "")
            args = tc.get("arguments", {})

            # Try to parse JSON string arguments
            if isinstance(args, str):
                try:
                    args = json.loads(args)
                except Exception:
                    pass

            new_tool_calls.append(
                {
                    "id": tc.get("id") or tc.get("tool_call_id"),
                    "type": tc.get("type", "function"),
                    "function": {
                        "name": fn_name,
                        "arguments": args,
                    },
                }
            )

        m["tool_calls"] = new_tool_calls

    # 3b) Build map from tool_call_id -> function name for later tool responses
    id_to_name = {}
    for m in msgs:
        for tc in m.get("tool_calls", []) or []:
            if not isinstance(tc, dict):
                continue
            fn = tc.get("function") or {}
            name = fn.get("name") or tc.get("name")
            tc_id = tc.get("id") or tc.get("tool_call_id")
            if tc_id and name:
                id_to_name[tc_id] = name

    # 3c) Ensure tool response messages have a 'name'
    for m in msgs:
        if m.get("role") == "tool":
            if not m.get("name"):
                # Try to infer from tool_call_id using previous map
                tc_id = m.get("tool_call_id")
                inferred = id_to_name.get(tc_id) if tc_id else None
                m["name"] = inferred or "unknown_tool"

    # 4) Normalize tool schemas to HF-style {type:'function', function:{...}}
    adapted_tools = []
    for t in tools_raw:
        if not isinstance(t, dict):
            continue

        if "function" in t and isinstance(t["function"], dict):
            adapted_tools.append(t)
            continue

        name = t.get("name", "")
        description = t.get("description", "")
        parameters = t.get("parameters") or {
            "type": "object",
            "properties": {},
        }

        adapted_tools.append(
            {
                "type": t.get("type", "function"),
                "function": {
                    "name": name,
                    "description": description,
                    "parameters": parameters,
                },
            }
        )

    # Delete empty system message
    first_message = msgs[0]
    if first_message["role"] == "system" and "content" not in first_message:
        msgs.pop(0)

    return msgs, adapted_tools

In [26]:
def format_example(example):
    messages, tools = prepare_messages_and_tools(example)

    # FILTER: If the preparation returned None, this example was bad.
    if messages is None or len(messages) == 0:
        return {"text": None}

    chat_str = tokenizer.apply_chat_template(
        messages,
        tools = tools,
        add_generation_prompt = False,
        tokenize = False,
    ).removeprefix("<bos>")

    return {
        "text": chat_str,
    }

# Apply the map
train_dataset = dataset.map(format_example)

# Filter out the None values
train_dataset = train_dataset.filter(lambda x: x["text"] is not None)

print(f"Dataset size after filtering: {len(train_dataset)}")

Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10000/10000 [00:03<00:00, 3330.53 examples/s]
Filter: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10000/10000 [00:00<00:00, 98089.66 examples/s]

Dataset size after filtering: 4699





In [28]:
from trl import SFTTrainer,SFTConfig




In [31]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    # tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = None, # Can set up evaluation!
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 2, # Use GA to mimic batch size!
        warmup_steps = 10,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 500,
        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.001,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use TrackIO/WandB etc
    ),
)

Adding EOS to train dataset: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 4699/4699 [00:00<00:00, 7701.13 examples/s]
Tokenizing train dataset: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 4699/4699 [00:08<00:00, 563.13 examples/s] 
Truncating train dataset: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 4699/4699 [00:00<00:00, 115960.74 examples/s]
The model is already on multiple devices. Skipping the move to device specified in `args`.


In [33]:
import unsloth

In [32]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<start_of_turn>user\n",
    response_part = "<start_of_turn>model\n",
)


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth.chat_templates import train_on_responses_only


ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!


Map (num_proc=1): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 4699/4699 [00:38<00:00, 122.75 examples/s]


In [34]:
tokenizer.decode(trainer.train_dataset[-1]["input_ids"])

"<bos><start_of_turn>developer\nYou are K2, a helpful assistant created by Mohamed bin Zayed University of Artificial Intelligence (MBZUAI) Institute of Foundation Models (IFM).<start_function_declaration>declaration:final_velocity{description:<escape>Calculates the final velocity of an object given its initial velocity, acceleration, and time.<escape>,parameters:{properties:{acceleration:{description:<escape>The acceleration of the object.<escape>,type:<escape>NUMBER<escape>},initial_velocity:{description:<escape>The initial velocity of the object.<escape>,type:<escape>NUMBER<escape>},time:{description:<escape>The time elapsed.<escape>,type:<escape>NUMBER<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:permutation_count{description:<escape>Calculates the number of permutations of k elements from a set of n elements.<escape>,parameters:{properties:{k:{description:<escape>The number of elements to choose for the permutation.<escap

In [35]:
[tokenizer.decode([tokenizer.pad_token_id if x == -100 else x for x in trainer.train_dataset[100]["labels"]]).replace(tokenizer.pad_token, "-")]

['--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------<think>User wants DNA sequence with ID HQ664368, format FASTA, upstream 250 bases. Use function get_dna_sequence with sequence_id "HQ664368", file_format "fasta", upstream_bases "250".</think><start_function_call>call:get_dna_sequence{file_format:<escape>fasta<escape>,sequence_id:<escape>HQ664368<escape>,upstream_bases:<escape>250<escape>}<end_function_call><start_function_response><eos>']

In [None]:
import torch._dynamo
torch._dynamo.config.suppress_errors = True
torch._dynamo.reset()

# Disable torch.compile completely for now
torch.set_float32_matmul_precision('high')
torch.backends.cudnn.allow_tf32 = True

In [39]:
import os
os.environ['TORCHDYNAMO_DISABLE'] = '1'
os.environ['TRITON_CACHE_DIR'] = 'C:/Users/dhanu/triton_cache'

In [40]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce RTX 3050 Laptop GPU. Max memory = 4.0 GB.
0.668 GB of memory reserved.


In [41]:
trainer_stats = trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.32 GiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 8.82 GiB is allocated by PyTorch, and 27.70 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [None]:
messages, tools = prepare_messages_and_tools(train_dataset[0])

text = tokenizer.apply_chat_template(
    messages[:1],
    tools = tools,
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
).removeprefix('<bos>')

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    max_new_tokens = 1024,
    streamer = TextStreamer(tokenizer, skip_prompt = False),
    top_p = 0.95, top_k = 64, temperature = 1.0,
)

In [None]:
model.save_pretrained("functiongemma")  # Local saving
tokenizer.save_pretrained("functiongemma")