In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import (
    IA3Config,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

import pandas as pd
from datasets import Dataset

import bitsandbytes as bnb

torch.cuda.empty_cache()

In [2]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

base_model = "meta-llama/Llama-3.2-3B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

In [3]:
data = pd.read_json("../data/queries_dataset.json")
data.head()

Unnamed: 0,object_context,actions_dictionary,query,explanation,bt
0,a beach chair and umbrella on the beach (28.3m...,NavigateTo: Moves the robot to a specified tar...,Can you please bring me the bowl of soup and t...,"Good, I will first locate the bowl of soup in ...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
1,a table with chairs (21.3m)\ntwo men standing ...,NavigateTo: Moves the robot to a specified tar...,"""Can you identify the man in the black jacket ...","In this scenario, I will first attempt to loca...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
2,a black cow grazing in a field (54.5m)\na man ...,NavigateTo: Moves the robot to a specified tar...,"""Can you locate the person standing in front o...","In this scenario, I will attempt to locate the...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
3,a blur of a person walking on a street (13.5m)...,NavigateTo: Moves the robot to a specified tar...,"""Can you display a message on the laptop to re...",I will start by attempting to display a messag...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."
4,a group of men playing frc (64.7m)\na polar be...,NavigateTo: Moves the robot to a specified tar...,"""Can you take the bowl filled with fruit and i...",I will first check if there is a bowl filled w...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."


In [4]:
object_context = data['object_context']
actions_dictionary = data['actions_dictionary']
query = data['query']
explanation = data['explanation']
bt = data['bt']


# Create df with the following columns: instruction, input, output
def formatting_input(examples):
    instruction = """You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree in the behaviortree cpp format.

Object Context : 
{object_context}

Actions Dictionary:
{actions_dictionary}
"""

    instruction = instruction.format(object_context=examples['object_context'], actions_dictionary=examples['actions_dictionary'])
    return instruction

for i in range(len(data)):
    instruction = formatting_input(data.iloc[i])
    print(instruction)
    break

formatted_data = pd.DataFrame({
    'instruction': instruction,
    'input': query,
    'output': bt,
})

formatted_data.head()

You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree in the behaviortree cpp format.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments: message)
RunDiagnostics: Runs system diagnostics to check 

Unnamed: 0,instruction,input,output
0,You are a helpful robot assistant named Goat t...,Can you please bring me the bowl of soup and t...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."
1,You are a helpful robot assistant named Goat t...,"""Can you identify the man in the black jacket ...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
2,You are a helpful robot assistant named Goat t...,"""Can you locate the person standing in front o...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
3,You are a helpful robot assistant named Goat t...,"""Can you display a message on the laptop to re...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
4,You are a helpful robot assistant named Goat t...,"""Can you take the bowl filled with fruit and i...","<root main_tree_to_execute=""MainTree"">\n <Beh..."


In [5]:
data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""

EOS_TOKEN = "<|eot_id|>"

def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
print(training_data[0]["text"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree in the behaviortree cpp format.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments

In [6]:

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)


# LoRA config
#peft_config = PrefixTuningConfig(
#    num_virtual_tokens=30,  # Number of virtual tokens
#    task_type="CAUSAL_LM",  # Task type
#)

peft_config = IA3Config(task_type="CAUSAL_LM", target_modules=["k_proj", "v_proj", "down_proj"], feedforward_modules=["down_proj"])



new_model = "llama-3.2-1b-bt-generator-ia3"

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=10,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

# Create train/test split
full_dataset = training_data.train_test_split(test_size=0.1, seed=42)

# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=full_dataset["train"],
    eval_dataset=full_dataset["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)




Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/90 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [7]:
data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}"""

EOS_TOKEN = "<|eot_id|>"

def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
print(training_data[0]["text"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree in the behaviortree cpp format.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments

In [8]:
instructions = training_data["instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 
I will execute the action "Pick" to pick up the bowl of soup from the beach chair.

Here is the behavior tree in Cpp format:
```cpp
NavigateTo(table)
  - Pick(bowl of soup)
    - MoveForward(5.0m)
    - Place(bowl of soup, table)
```
The bowl of soup is now at the table.<|eot_id|>


In [9]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/450 [00:00<?, ?it/s]

{'loss': 2.3277, 'grad_norm': 0.37115049362182617, 'learning_rate': 2e-05, 'epoch': 0.02}
{'loss': 2.303, 'grad_norm': 0.3913576900959015, 'learning_rate': 4e-05, 'epoch': 0.04}
{'loss': 2.4745, 'grad_norm': 0.4053996503353119, 'learning_rate': 6e-05, 'epoch': 0.07}
{'loss': 2.4688, 'grad_norm': 0.4172709882259369, 'learning_rate': 8e-05, 'epoch': 0.09}
{'loss': 2.539, 'grad_norm': 0.42384669184684753, 'learning_rate': 0.0001, 'epoch': 0.11}
{'loss': 2.5409, 'grad_norm': 0.42062467336654663, 'learning_rate': 0.00012, 'epoch': 0.13}
{'loss': 2.5101, 'grad_norm': 0.4310063123703003, 'learning_rate': 0.00014, 'epoch': 0.16}
{'loss': 2.6112, 'grad_norm': 0.42962774634361267, 'learning_rate': 0.00016, 'epoch': 0.18}
{'loss': 2.9048, 'grad_norm': 0.4854852557182312, 'learning_rate': 0.00018, 'epoch': 0.2}
{'loss': 2.9927, 'grad_norm': 0.49561816453933716, 'learning_rate': 0.0002, 'epoch': 0.22}
{'loss': 2.9919, 'grad_norm': 0.5130966305732727, 'learning_rate': 0.00019954545454545455, 'epoch'

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.8271, 'grad_norm': 0.2228698581457138, 'learning_rate': 0.00016363636363636366, 'epoch': 2.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 2.105137586593628, 'eval_runtime': 1.0733, 'eval_samples_per_second': 9.317, 'eval_steps_per_second': 9.317, 'epoch': 2.0}
{'loss': 1.7905, 'grad_norm': 0.20809856057167053, 'learning_rate': 0.0001631818181818182, 'epoch': 2.02}
{'loss': 1.8256, 'grad_norm': 0.222058966755867, 'learning_rate': 0.00016272727272727272, 'epoch': 2.04}
{'loss': 1.8, 'grad_norm': 0.21095602214336395, 'learning_rate': 0.0001622727272727273, 'epoch': 2.07}
{'loss': 1.873, 'grad_norm': 0.21737197041511536, 'learning_rate': 0.00016181818181818184, 'epoch': 2.09}
{'loss': 1.8057, 'grad_norm': 0.2232443392276764, 'learning_rate': 0.00016136363636363635, 'epoch': 2.11}
{'loss': 1.8031, 'grad_norm': 0.22281919419765472, 'learning_rate': 0.00016090909090909092, 'epoch': 2.13}
{'loss': 1.8679, 'grad_norm': 0.23071524500846863, 'learning_rate': 0.00016045454545454547, 'epoch': 2.16}
{'loss': 1.9032, 'grad_norm': 0.25060176849365234, 'learning_rate': 0.00016, 'epoch': 2.18}
{'loss': 2.0797, 'grad_norm': 0

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.4727, 'grad_norm': 0.21479246020317078, 'learning_rate': 0.00012272727272727272, 'epoch': 4.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 1.6826480627059937, 'eval_runtime': 1.1005, 'eval_samples_per_second': 9.087, 'eval_steps_per_second': 9.087, 'epoch': 4.0}
{'loss': 1.4282, 'grad_norm': 0.20051975548267365, 'learning_rate': 0.00012227272727272727, 'epoch': 4.02}
{'loss': 1.4486, 'grad_norm': 0.2088058441877365, 'learning_rate': 0.00012181818181818183, 'epoch': 4.04}
{'loss': 1.4804, 'grad_norm': 0.21559588611125946, 'learning_rate': 0.00012136363636363637, 'epoch': 4.07}
{'loss': 1.4851, 'grad_norm': 0.20453603565692902, 'learning_rate': 0.0001209090909090909, 'epoch': 4.09}
{'loss': 1.4251, 'grad_norm': 0.21104025840759277, 'learning_rate': 0.00012045454545454546, 'epoch': 4.11}
{'loss': 1.4937, 'grad_norm': 0.22659382224082947, 'learning_rate': 0.00012, 'epoch': 4.13}
{'loss': 1.5521, 'grad_norm': 0.22575554251670837, 'learning_rate': 0.00011954545454545456, 'epoch': 4.16}
{'loss': 1.5642, 'grad_norm': 0.2347021847963333, 'learning_rate': 0.00011909090909090909, 'epoch': 4.18}
{'loss': 1.7293, 'grad_n

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.3381, 'grad_norm': 0.2087547481060028, 'learning_rate': 8.181818181818183e-05, 'epoch': 6.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 1.3890645503997803, 'eval_runtime': 1.0977, 'eval_samples_per_second': 9.11, 'eval_steps_per_second': 9.11, 'epoch': 6.0}
{'loss': 1.2349, 'grad_norm': 0.21409077942371368, 'learning_rate': 8.136363636363636e-05, 'epoch': 6.02}
{'loss': 1.2512, 'grad_norm': 0.19981037080287933, 'learning_rate': 8.090909090909092e-05, 'epoch': 6.04}
{'loss': 1.2578, 'grad_norm': 0.21660538017749786, 'learning_rate': 8.045454545454546e-05, 'epoch': 6.07}
{'loss': 1.1741, 'grad_norm': 0.19755034148693085, 'learning_rate': 8e-05, 'epoch': 6.09}
{'loss': 1.2039, 'grad_norm': 0.20340712368488312, 'learning_rate': 7.954545454545455e-05, 'epoch': 6.11}
{'loss': 1.3123, 'grad_norm': 0.23910517990589142, 'learning_rate': 7.90909090909091e-05, 'epoch': 6.13}
{'loss': 1.3935, 'grad_norm': 0.22277049720287323, 'learning_rate': 7.863636363636364e-05, 'epoch': 6.16}
{'loss': 1.3878, 'grad_norm': 0.255632221698761, 'learning_rate': 7.818181818181818e-05, 'epoch': 6.18}
{'loss': 1.4708, 'grad_norm': 0.266

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.1497, 'grad_norm': 0.21001681685447693, 'learning_rate': 4.0909090909090915e-05, 'epoch': 8.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 1.203382134437561, 'eval_runtime': 1.1059, 'eval_samples_per_second': 9.042, 'eval_steps_per_second': 9.042, 'epoch': 8.0}
{'loss': 1.0979, 'grad_norm': 0.21353238821029663, 'learning_rate': 4.045454545454546e-05, 'epoch': 8.02}
{'loss': 1.0559, 'grad_norm': 0.2105957567691803, 'learning_rate': 4e-05, 'epoch': 8.04}
{'loss': 1.0584, 'grad_norm': 0.19047656655311584, 'learning_rate': 3.954545454545455e-05, 'epoch': 8.07}
{'loss': 1.0726, 'grad_norm': 0.20908606052398682, 'learning_rate': 3.909090909090909e-05, 'epoch': 8.09}
{'loss': 1.0692, 'grad_norm': 0.21913571655750275, 'learning_rate': 3.8636363636363636e-05, 'epoch': 8.11}
{'loss': 1.1247, 'grad_norm': 0.19585619866847992, 'learning_rate': 3.818181818181819e-05, 'epoch': 8.13}
{'loss': 1.0766, 'grad_norm': 0.2042999416589737, 'learning_rate': 3.7727272727272725e-05, 'epoch': 8.16}
{'loss': 1.1035, 'grad_norm': 0.22026589512825012, 'learning_rate': 3.7272727272727276e-05, 'epoch': 8.18}
{'loss': 1.1725, 'grad_norm': 

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.034, 'grad_norm': 0.20743925869464874, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 1.1415091753005981, 'eval_runtime': 1.0759, 'eval_samples_per_second': 9.295, 'eval_steps_per_second': 9.295, 'epoch': 10.0}
{'train_runtime': 228.1726, 'train_samples_per_second': 3.944, 'train_steps_per_second': 1.972, 'train_loss': 1.56362729522917, 'epoch': 10.0}


TrainOutput(global_step=450, training_loss=1.56362729522917, metrics={'train_runtime': 228.1726, 'train_samples_per_second': 3.944, 'train_steps_per_second': 1.972, 'total_flos': 2250468853800960.0, 'train_loss': 1.56362729522917, 'epoch': 10.0})

In [10]:
inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].split(EOS_TOKEN)[0]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 
<Root>
    <Actions>
        <RequestAssistance>BringBowlOfSoup</Actions>
        <NavigateTo>BeachChairAndUmbrella</Actions>
        <Pick>Bowl</Actions>
        <Place>BowlOnTable</Actions>
    </Actions>
</Root>
Wait(10)
<Actions>
    <ExecuteCommand>move forward 5</Actions>
    <ReleaseBowlOnTable</Actions>
</Root>
<Wait(5)>
<Actions>
    <Communicate>send message "Thank you for bringing me soup!"</Actions>
</Root>
<Wait(1)>
