In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import (
    PrefixTuningConfig,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

import pandas as pd
from datasets import Dataset

import bitsandbytes as bnb

torch.cuda.empty_cache()

In [2]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

base_model = "meta-llama/Llama-3.2-1B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

In [3]:
data = pd.read_json("../data/queries_dataset.json")
data.head()

Unnamed: 0,object_context,actions_dictionary,query,explanation,bt
0,a beach chair and umbrella on the beach (28.3m...,NavigateTo: Moves the robot to a specified tar...,Can you please bring me the bowl of soup and t...,"Good, I will first locate the bowl of soup in ...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
1,a table with chairs (21.3m)\ntwo men standing ...,NavigateTo: Moves the robot to a specified tar...,"""Can you identify the man in the black jacket ...","In this scenario, I will first attempt to loca...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
2,a black cow grazing in a field (54.5m)\na man ...,NavigateTo: Moves the robot to a specified tar...,"""Can you locate the person standing in front o...","In this scenario, I will attempt to locate the...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
3,a blur of a person walking on a street (13.5m)...,NavigateTo: Moves the robot to a specified tar...,"""Can you display a message on the laptop to re...",I will start by attempting to display a messag...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."
4,a group of men playing frc (64.7m)\na polar be...,NavigateTo: Moves the robot to a specified tar...,"""Can you take the bowl filled with fruit and i...",I will first check if there is a bowl filled w...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."


In [4]:
object_context = data['object_context']
actions_dictionary = data['actions_dictionary']
query = data['query']
explanation = data['explanation']
bt = data['bt']


# Create df with the following columns: instruction, input, output
def formatting_input(examples):
    instruction = """You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree.

Object Context : 
{object_context}

Actions Dictionary:
{actions_dictionary}
"""

    instruction = instruction.format(object_context=examples['object_context'], actions_dictionary=examples['actions_dictionary'])
    return instruction

for i in range(len(data)):
    instruction = formatting_input(data.iloc[i])
    print(instruction)
    break

formatted_data = pd.DataFrame({
    'instruction': instruction,
    'input': query,
    'output': bt,
})

formatted_data.head()

You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments: message)
RunDiagnostics: Runs system diagnostics to check for issues. (Arguments: )
MoveF

Unnamed: 0,instruction,input,output
0,You are a helpful robot assistant named Goat t...,Can you please bring me the bowl of soup and t...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."
1,You are a helpful robot assistant named Goat t...,"""Can you identify the man in the black jacket ...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
2,You are a helpful robot assistant named Goat t...,"""Can you locate the person standing in front o...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
3,You are a helpful robot assistant named Goat t...,"""Can you display a message on the laptop to re...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
4,You are a helpful robot assistant named Goat t...,"""Can you take the bowl filled with fruit and i...","<root main_tree_to_execute=""MainTree"">\n <Beh..."


In [5]:
data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""

EOS_TOKEN = "<|eot_id|>"

def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
print(training_data[0]["text"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments: message)
RunDiagnostics: Runs

In [6]:

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)


# LoRA config
peft_config = PrefixTuningConfig(
    num_virtual_tokens=30,  # Number of virtual tokens
    task_type="CAUSAL_LM",  # Task type
)


new_model = "llama-3.2-1b-bt-generator-prefix"

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=3,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

# Create train/test split
full_dataset = training_data.train_test_split(test_size=0.1, seed=42)

# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=full_dataset["train"],
    eval_dataset=full_dataset["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)




Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/90 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [7]:
data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}"""

EOS_TOKEN = "<|eot_id|>"

def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
print(training_data[0]["text"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments: message)
RunDiagnostics: Runs

In [8]:
instructions = training_data["instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 
I will execute the action of bringing the bowl of soup and then release it on the table.

Actions Executed:
- Bring: Moves the robot to the bowl of soup (28.3m)
- Release: Places the bowl of soup on the table (0.0m)

Result:
The bowl of soup is now on the table.

Object Context remains the same: a beach chair and umbrella on the beach (28.3m), a woman walking down the street with an umbrella (96.9m), a blur of a person in a classroom (95.0m)

Next Action:
- LocateObject: Searches for the bowl of soup in the environment.<|eot_id|>


In [9]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/135 [00:00<?, ?it/s]

We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class (https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)


{'loss': 12.0998, 'grad_norm': 0.8972693681716919, 'learning_rate': 2e-05, 'epoch': 0.02}
{'loss': 12.0266, 'grad_norm': 0.8810555934906006, 'learning_rate': 4e-05, 'epoch': 0.04}
{'loss': 12.099, 'grad_norm': 0.9255397319793701, 'learning_rate': 6e-05, 'epoch': 0.07}
{'loss': 11.9771, 'grad_norm': 0.9212969541549683, 'learning_rate': 8e-05, 'epoch': 0.09}
{'loss': 11.9784, 'grad_norm': 0.9261974692344666, 'learning_rate': 0.0001, 'epoch': 0.11}
{'loss': 11.8053, 'grad_norm': 0.969542384147644, 'learning_rate': 0.00012, 'epoch': 0.13}
{'loss': 11.758, 'grad_norm': 0.9381641149520874, 'learning_rate': 0.00014, 'epoch': 0.16}
{'loss': 11.7821, 'grad_norm': 0.961219310760498, 'learning_rate': 0.00016, 'epoch': 0.18}
{'loss': 11.639, 'grad_norm': 1.0287214517593384, 'learning_rate': 0.00018, 'epoch': 0.2}
{'loss': 11.4382, 'grad_norm': 1.0965269804000854, 'learning_rate': 0.0002, 'epoch': 0.22}
{'loss': 11.2884, 'grad_norm': 1.1242878437042236, 'learning_rate': 0.0001984, 'epoch': 0.24}
{'

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 11.4016, 'grad_norm': 0.6568716168403625, 'learning_rate': 0.0001728, 'epoch': 0.6}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 11.064292907714844, 'eval_runtime': 1.0593, 'eval_samples_per_second': 9.44, 'eval_steps_per_second': 9.44, 'epoch': 0.6}
{'loss': 11.3786, 'grad_norm': 0.635890543460846, 'learning_rate': 0.00017120000000000001, 'epoch': 0.62}
{'loss': 11.2793, 'grad_norm': 0.6475136280059814, 'learning_rate': 0.0001696, 'epoch': 0.64}
{'loss': 11.0392, 'grad_norm': 0.6697637438774109, 'learning_rate': 0.000168, 'epoch': 0.67}
{'loss': 10.6463, 'grad_norm': 0.7631388306617737, 'learning_rate': 0.0001664, 'epoch': 0.69}
{'loss': 10.5948, 'grad_norm': 0.7601417303085327, 'learning_rate': 0.0001648, 'epoch': 0.71}
{'loss': 10.4571, 'grad_norm': 0.7995731234550476, 'learning_rate': 0.0001632, 'epoch': 0.73}
{'loss': 11.3247, 'grad_norm': 0.6531544327735901, 'learning_rate': 0.00016160000000000002, 'epoch': 0.76}
{'loss': 11.398, 'grad_norm': 0.6191014647483826, 'learning_rate': 0.00016, 'epoch': 0.78}
{'loss': 11.308, 'grad_norm': 0.6139654517173767, 'learning_rate': 0.00015840000000000003, 

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 10.4797, 'grad_norm': 0.6087929606437683, 'learning_rate': 0.0001296, 'epoch': 1.2}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 10.505821228027344, 'eval_runtime': 1.0498, 'eval_samples_per_second': 9.526, 'eval_steps_per_second': 9.526, 'epoch': 1.2}
{'loss': 10.3604, 'grad_norm': 0.6282442808151245, 'learning_rate': 0.00012800000000000002, 'epoch': 1.22}
{'loss': 10.0728, 'grad_norm': 0.6826703548431396, 'learning_rate': 0.0001264, 'epoch': 1.24}
{'loss': 11.0055, 'grad_norm': 0.5656613111495972, 'learning_rate': 0.0001248, 'epoch': 1.27}
{'loss': 10.9045, 'grad_norm': 0.582356333732605, 'learning_rate': 0.0001232, 'epoch': 1.29}
{'loss': 10.7734, 'grad_norm': 0.5679881572723389, 'learning_rate': 0.0001216, 'epoch': 1.31}
{'loss': 10.8503, 'grad_norm': 0.5773313641548157, 'learning_rate': 0.00012, 'epoch': 1.33}
{'loss': 10.8226, 'grad_norm': 0.5659737586975098, 'learning_rate': 0.0001184, 'epoch': 1.36}
{'loss': 10.6178, 'grad_norm': 0.5682089328765869, 'learning_rate': 0.00011679999999999999, 'epoch': 1.38}
{'loss': 10.656, 'grad_norm': 0.5735082626342773, 'learning_rate': 0.0001152, 'epoch': 

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 10.4675, 'grad_norm': 0.5401904582977295, 'learning_rate': 8.64e-05, 'epoch': 1.8}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 10.13664436340332, 'eval_runtime': 1.0598, 'eval_samples_per_second': 9.436, 'eval_steps_per_second': 9.436, 'epoch': 1.8}
{'loss': 10.5063, 'grad_norm': 0.5574674606323242, 'learning_rate': 8.48e-05, 'epoch': 1.82}
{'loss': 10.4385, 'grad_norm': 0.5474872589111328, 'learning_rate': 8.32e-05, 'epoch': 1.84}
{'loss': 10.3328, 'grad_norm': 0.5469573736190796, 'learning_rate': 8.16e-05, 'epoch': 1.87}
{'loss': 10.1753, 'grad_norm': 0.5543910264968872, 'learning_rate': 8e-05, 'epoch': 1.89}
{'loss': 10.0788, 'grad_norm': 0.5717887282371521, 'learning_rate': 7.840000000000001e-05, 'epoch': 1.91}
{'loss': 9.815, 'grad_norm': 0.6391775608062744, 'learning_rate': 7.680000000000001e-05, 'epoch': 1.93}
{'loss': 9.687, 'grad_norm': 0.661594569683075, 'learning_rate': 7.52e-05, 'epoch': 1.96}
{'loss': 9.3552, 'grad_norm': 0.7173839807510376, 'learning_rate': 7.36e-05, 'epoch': 1.98}
{'loss': 10.5777, 'grad_norm': 0.5640469193458557, 'learning_rate': 7.2e-05, 'epoch': 2.0}
{'loss': 10

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 10.0274, 'grad_norm': 0.5395910739898682, 'learning_rate': 4.32e-05, 'epoch': 2.4}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 9.914584159851074, 'eval_runtime': 1.0592, 'eval_samples_per_second': 9.441, 'eval_steps_per_second': 9.441, 'epoch': 2.4}
{'loss': 10.042, 'grad_norm': 0.5535166263580322, 'learning_rate': 4.16e-05, 'epoch': 2.42}
{'loss': 9.8886, 'grad_norm': 0.550167441368103, 'learning_rate': 4e-05, 'epoch': 2.44}
{'loss': 9.5514, 'grad_norm': 0.6326387524604797, 'learning_rate': 3.8400000000000005e-05, 'epoch': 2.47}
{'loss': 9.1814, 'grad_norm': 0.6961883306503296, 'learning_rate': 3.68e-05, 'epoch': 2.49}
{'loss': 10.3596, 'grad_norm': 0.5757396221160889, 'learning_rate': 3.52e-05, 'epoch': 2.51}
{'loss': 10.3361, 'grad_norm': 0.5550801157951355, 'learning_rate': 3.3600000000000004e-05, 'epoch': 2.53}
{'loss': 10.2752, 'grad_norm': 0.5450955033302307, 'learning_rate': 3.2000000000000005e-05, 'epoch': 2.56}
{'loss': 10.2662, 'grad_norm': 0.5442425608634949, 'learning_rate': 3.04e-05, 'epoch': 2.58}
{'loss': 10.2777, 'grad_norm': 0.5480378270149231, 'learning_rate': 2.88e-05, 'epoch'

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 10.1121, 'grad_norm': 0.5206335783004761, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 9.839621543884277, 'eval_runtime': 1.0504, 'eval_samples_per_second': 9.521, 'eval_steps_per_second': 9.521, 'epoch': 3.0}
{'train_runtime': 69.6178, 'train_samples_per_second': 3.878, 'train_steps_per_second': 1.939, 'train_loss': 10.665396005135996, 'epoch': 3.0}


TrainOutput(global_step=135, training_loss=10.665396005135996, metrics={'train_runtime': 69.6178, 'train_samples_per_second': 3.878, 'train_steps_per_second': 1.939, 'total_flos': 663109545885696.0, 'train_loss': 10.665396005135996, 'epoch': 3.0})

In [10]:
inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].split(EOS_TOKEN)[0]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 
I'll bring the bowl of soup to you. I'll also place it on the table.

Object Context: 
- A bowl of soup (10.0m) 
- A table (0.0m) 

Actions Dictionary:
Bring: Moves the robot to a specified location. (Arguments: location)
Place: Places an object at a specified location. (Arguments: object, location)
Release: Releases an object from a specified location. (Arguments: object)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
Wait: Waits for a specified amount of time. (Arguments: duration)
Communicate: Sends a message to a user or another robot. (Arguments: message)
MoveForward: Moves the robot forward by a specified distance. (Arguments: distance)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)

Your current position is at 28.3m. I am now 96.9m away from the woman walking down the street with an umbrella. I am currently at 95.0m away from the blur of a person in a classroom.

What would you like