In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import (
    IA3Config,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

import pandas as pd
from datasets import Dataset

import bitsandbytes as bnb

torch.cuda.empty_cache()

In [2]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

base_model = "meta-llama/Llama-3.2-3B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

1

In [3]:
data = pd.read_json("../data/queries_dataset.json")
data.head()

Unnamed: 0,object_context,actions_dictionary,query,explanation,bt
0,a beach chair and umbrella on the beach (28.3m...,NavigateTo: Moves the robot to a specified tar...,Can you please bring me the bowl of soup and t...,"Good, I will first locate the bowl of soup in ...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
1,a table with chairs (21.3m)\ntwo men standing ...,NavigateTo: Moves the robot to a specified tar...,"""Can you identify the man in the black jacket ...","In this scenario, I will first attempt to loca...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
2,a black cow grazing in a field (54.5m)\na man ...,NavigateTo: Moves the robot to a specified tar...,"""Can you locate the person standing in front o...","In this scenario, I will attempt to locate the...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
3,a blur of a person walking on a street (13.5m)...,NavigateTo: Moves the robot to a specified tar...,"""Can you display a message on the laptop to re...",I will start by attempting to display a messag...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."
4,a group of men playing frc (64.7m)\na polar be...,NavigateTo: Moves the robot to a specified tar...,"""Can you take the bowl filled with fruit and i...",I will first check if there is a bowl filled w...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."


In [4]:
object_context = data['object_context']
actions_dictionary = data['actions_dictionary']
query = data['query']
explanation = data['explanation']
bt = data['bt']


# Create df with the following columns: instruction, input, output
def formatting_input(examples):
    instruction = """You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree in the behaviortree cpp format.

Object Context : 
{object_context}

Actions Dictionary:
{actions_dictionary}
"""

    instruction = instruction.format(object_context=examples['object_context'], actions_dictionary=examples['actions_dictionary'])
    return instruction

for i in range(len(data)):
    instruction = formatting_input(data.iloc[i])
    print(instruction)
    break

formatted_data = pd.DataFrame({
    'instruction': instruction,
    'input': query,
    'output': bt,
})

formatted_data.head()

You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree in the behaviortree cpp format.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments: message)
RunDiagnostics: Runs system diagnostics to check 

Unnamed: 0,instruction,input,output
0,You are a helpful robot assistant named Goat t...,Can you please bring me the bowl of soup and t...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."
1,You are a helpful robot assistant named Goat t...,"""Can you identify the man in the black jacket ...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
2,You are a helpful robot assistant named Goat t...,"""Can you locate the person standing in front o...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
3,You are a helpful robot assistant named Goat t...,"""Can you display a message on the laptop to re...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
4,You are a helpful robot assistant named Goat t...,"""Can you take the bowl filled with fruit and i...","<root main_tree_to_execute=""MainTree"">\n <Beh..."


In [5]:
data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""

EOS_TOKEN = "<|eot_id|>"

def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
print(training_data[0]["text"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree in the behaviortree cpp format.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments

In [6]:

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)


# LoRA config
#peft_config = PrefixTuningConfig(
#    num_virtual_tokens=30,  # Number of virtual tokens
#    task_type="CAUSAL_LM",  # Task type
#)

peft_config = IA3Config(task_type="CAUSAL_LM", target_modules=["k_proj", "v_proj", "down_proj"], feedforward_modules=["down_proj"])



new_model = "llama-3.2-1b-bt-generator-ia3"

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=10,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

# Create train/test split
full_dataset = training_data.train_test_split(test_size=0.1, seed=42)

# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=full_dataset["train"],
    eval_dataset=full_dataset["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)




Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/90 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [7]:
data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}"""

EOS_TOKEN = "<|eot_id|>"

def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
print(training_data[0]["text"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree in the behaviortree cpp format.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments

In [8]:
instructions = training_data["instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 
I will generate a behavior tree to accomplish this task.

**Behavior Tree:**

1. **NavigateTo** (target: "bowl of soup")
	* ExecuteCommand (command: "LocateObject" with argument: "bowl of soup")
2. **Wait** (duration: 2 seconds)
3. **Pick** (object: "bowl of soup")
4. **NavigateTo** (target: "table")
5. **Place** (object: "bowl of soup", location: "table")
6. **Release** (object: "bowl of soup") -> This is not explicitly stated in the original request, but it's assumed that the bowl of soup will be released after it's placed on the table.

Please note that this behavior tree assumes that the robot can see the bowl of soup and the table, and that it can move around and interact with objects in the environment. If there are any obstacles or limitations, the robot may need to be modified to accommodate those conditions.<|eot_id|>


In [9]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/450 [00:00<?, ?it/s]

{'loss': 2.2404, 'grad_norm': 0.5052285194396973, 'learning_rate': 2e-05, 'epoch': 0.02}
{'loss': 2.222, 'grad_norm': 0.49845561385154724, 'learning_rate': 4e-05, 'epoch': 0.04}
{'loss': 2.3632, 'grad_norm': 0.5483046174049377, 'learning_rate': 6e-05, 'epoch': 0.07}
{'loss': 2.3553, 'grad_norm': 0.5415289402008057, 'learning_rate': 8e-05, 'epoch': 0.09}
{'loss': 2.4223, 'grad_norm': 0.56755530834198, 'learning_rate': 0.0001, 'epoch': 0.11}
{'loss': 2.4278, 'grad_norm': 0.5752573013305664, 'learning_rate': 0.00012, 'epoch': 0.13}
{'loss': 2.3848, 'grad_norm': 0.5165063738822937, 'learning_rate': 0.00014, 'epoch': 0.16}
{'loss': 2.5268, 'grad_norm': 0.540898859500885, 'learning_rate': 0.00016, 'epoch': 0.18}
{'loss': 2.831, 'grad_norm': 0.5970773696899414, 'learning_rate': 0.00018, 'epoch': 0.2}
{'loss': 2.8624, 'grad_norm': 0.5914139151573181, 'learning_rate': 0.0002, 'epoch': 0.22}
{'loss': 2.9287, 'grad_norm': 0.6268059611320496, 'learning_rate': 0.00019954545454545455, 'epoch': 0.24}

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.6745, 'grad_norm': 0.24527615308761597, 'learning_rate': 0.00016363636363636366, 'epoch': 2.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 1.9440873861312866, 'eval_runtime': 2.5618, 'eval_samples_per_second': 3.903, 'eval_steps_per_second': 3.903, 'epoch': 2.0}
{'loss': 1.6373, 'grad_norm': 0.20656536519527435, 'learning_rate': 0.0001631818181818182, 'epoch': 2.02}
{'loss': 1.6417, 'grad_norm': 0.21549652516841888, 'learning_rate': 0.00016272727272727272, 'epoch': 2.04}
{'loss': 1.6405, 'grad_norm': 0.23375044763088226, 'learning_rate': 0.0001622727272727273, 'epoch': 2.07}
{'loss': 1.7135, 'grad_norm': 0.20034921169281006, 'learning_rate': 0.00016181818181818184, 'epoch': 2.09}
{'loss': 1.649, 'grad_norm': 0.1997220665216446, 'learning_rate': 0.00016136363636363635, 'epoch': 2.11}
{'loss': 1.662, 'grad_norm': 0.21280109882354736, 'learning_rate': 0.00016090909090909092, 'epoch': 2.13}
{'loss': 1.6743, 'grad_norm': 0.22023487091064453, 'learning_rate': 0.00016045454545454547, 'epoch': 2.16}
{'loss': 1.7421, 'grad_norm': 0.2242555469274521, 'learning_rate': 0.00016, 'epoch': 2.18}
{'loss': 1.8945, 'grad_norm

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.3211, 'grad_norm': 0.17616033554077148, 'learning_rate': 0.00012272727272727272, 'epoch': 4.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 1.5032751560211182, 'eval_runtime': 2.5824, 'eval_samples_per_second': 3.872, 'eval_steps_per_second': 3.872, 'epoch': 4.0}
{'loss': 1.2482, 'grad_norm': 0.16714024543762207, 'learning_rate': 0.00012227272727272727, 'epoch': 4.02}
{'loss': 1.2724, 'grad_norm': 0.17992423474788666, 'learning_rate': 0.00012181818181818183, 'epoch': 4.04}
{'loss': 1.3178, 'grad_norm': 0.18119332194328308, 'learning_rate': 0.00012136363636363637, 'epoch': 4.07}
{'loss': 1.2705, 'grad_norm': 0.18901216983795166, 'learning_rate': 0.0001209090909090909, 'epoch': 4.09}
{'loss': 1.2398, 'grad_norm': 0.1562797576189041, 'learning_rate': 0.00012045454545454546, 'epoch': 4.11}
{'loss': 1.3204, 'grad_norm': 0.1721307337284088, 'learning_rate': 0.00012, 'epoch': 4.13}
{'loss': 1.3593, 'grad_norm': 0.1945624053478241, 'learning_rate': 0.00011954545454545456, 'epoch': 4.16}
{'loss': 1.3995, 'grad_norm': 0.19733306765556335, 'learning_rate': 0.00011909090909090909, 'epoch': 4.18}
{'loss': 1.5809, 'grad_no

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.1305, 'grad_norm': 0.18951039016246796, 'learning_rate': 8.181818181818183e-05, 'epoch': 6.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 1.172912359237671, 'eval_runtime': 2.6441, 'eval_samples_per_second': 3.782, 'eval_steps_per_second': 3.782, 'epoch': 6.0}
{'loss': 0.9917, 'grad_norm': 0.1759195327758789, 'learning_rate': 8.136363636363636e-05, 'epoch': 6.02}
{'loss': 1.0117, 'grad_norm': 0.19387516379356384, 'learning_rate': 8.090909090909092e-05, 'epoch': 6.04}
{'loss': 1.0665, 'grad_norm': 0.20359046757221222, 'learning_rate': 8.045454545454546e-05, 'epoch': 6.07}
{'loss': 0.9542, 'grad_norm': 0.20481397211551666, 'learning_rate': 8e-05, 'epoch': 6.09}
{'loss': 0.9705, 'grad_norm': 0.19291844964027405, 'learning_rate': 7.954545454545455e-05, 'epoch': 6.11}
{'loss': 1.128, 'grad_norm': 0.18996214866638184, 'learning_rate': 7.90909090909091e-05, 'epoch': 6.13}
{'loss': 1.1767, 'grad_norm': 0.20097191631793976, 'learning_rate': 7.863636363636364e-05, 'epoch': 6.16}
{'loss': 1.1674, 'grad_norm': 0.22462782263755798, 'learning_rate': 7.818181818181818e-05, 'epoch': 6.18}
{'loss': 1.2833, 'grad_norm': 0.22

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.9409, 'grad_norm': 0.1778581589460373, 'learning_rate': 4.0909090909090915e-05, 'epoch': 8.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 0.9568001627922058, 'eval_runtime': 2.5515, 'eval_samples_per_second': 3.919, 'eval_steps_per_second': 3.919, 'epoch': 8.0}
{'loss': 0.8462, 'grad_norm': 0.18034426867961884, 'learning_rate': 4.045454545454546e-05, 'epoch': 8.02}
{'loss': 0.8544, 'grad_norm': 0.18705035746097565, 'learning_rate': 4e-05, 'epoch': 8.04}
{'loss': 0.8045, 'grad_norm': 0.19402901828289032, 'learning_rate': 3.954545454545455e-05, 'epoch': 8.07}
{'loss': 0.8439, 'grad_norm': 0.17238454520702362, 'learning_rate': 3.909090909090909e-05, 'epoch': 8.09}
{'loss': 0.8235, 'grad_norm': 0.18469052016735077, 'learning_rate': 3.8636363636363636e-05, 'epoch': 8.11}
{'loss': 0.8587, 'grad_norm': 0.19191093742847443, 'learning_rate': 3.818181818181819e-05, 'epoch': 8.13}
{'loss': 0.7885, 'grad_norm': 0.1863243579864502, 'learning_rate': 3.7727272727272725e-05, 'epoch': 8.16}
{'loss': 0.885, 'grad_norm': 0.1865568310022354, 'learning_rate': 3.7272727272727276e-05, 'epoch': 8.18}
{'loss': 0.9467, 'grad_norm': 

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.8434, 'grad_norm': 0.1866290420293808, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 0.8881128430366516, 'eval_runtime': 2.6099, 'eval_samples_per_second': 3.831, 'eval_steps_per_second': 3.831, 'epoch': 10.0}
{'train_runtime': 525.2502, 'train_samples_per_second': 1.713, 'train_steps_per_second': 0.857, 'train_loss': 1.3740862434440189, 'epoch': 10.0}


TrainOutput(global_step=450, training_loss=1.3740862434440189, metrics={'train_runtime': 525.2502, 'train_samples_per_second': 1.713, 'train_steps_per_second': 0.857, 'total_flos': 6518227054448640.0, 'train_loss': 1.3740862434440189, 'epoch': 10.0})

In [10]:
inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].split(EOS_TOKEN)[0]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 
<root main_tree="MainTree">
  <BehaviorTree ID="MainTree">
    <Fallback>
      <Sequence>
        <Action ID="LocateObject" object="bowl of soup"/>
        <Action ID="NavigateTo" target="table"/>
        <Action ID="Place" object="bowl of soup" location="table"/>
      </Sequence>
    </Fallback>
  </BehaviorTree>
</root>
I will now execute the MainTree behavior tree.
