In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

import pandas as pd
from datasets import Dataset

import bitsandbytes as bnb

from huggingface_hub import login

torch.cuda.empty_cache()

In [14]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

base_model = "meta-llama/Llama-3.2-3B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct.
403 Client Error. (Request ID: Root=1-6747cb41-707a16823a2565b85586d0d4;919c30b1-17fe-4705-837d-8df0b848c329)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-11B-Vision-Instruct is restricted and you are not in the authorized list. Visit https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct to ask for access.

In [3]:
data = pd.read_json("../data/queries_dataset.json")
data.head()


Unnamed: 0,object_context,actions_dictionary,query,explanation,bt
0,a beach chair and umbrella on the beach (28.3m...,NavigateTo: Moves the robot to a specified tar...,Can you please bring me the bowl of soup and t...,"Good, I will first locate the bowl of soup in ...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
1,a table with chairs (21.3m)\ntwo men standing ...,NavigateTo: Moves the robot to a specified tar...,"""Can you identify the man in the black jacket ...","In this scenario, I will first attempt to loca...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
2,a black cow grazing in a field (54.5m)\na man ...,NavigateTo: Moves the robot to a specified tar...,"""Can you locate the person standing in front o...","In this scenario, I will attempt to locate the...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
3,a blur of a person walking on a street (13.5m)...,NavigateTo: Moves the robot to a specified tar...,"""Can you display a message on the laptop to re...",I will start by attempting to display a messag...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."
4,a group of men playing frc (64.7m)\na polar be...,NavigateTo: Moves the robot to a specified tar...,"""Can you take the bowl filled with fruit and i...",I will first check if there is a bowl filled w...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."


In [4]:
object_context = data['object_context']
actions_dictionary = data['actions_dictionary']
query = data['query']
explanation = data['explanation']
bt = data['bt']


# Create df with the following columns: instruction, input, output
def formatting_input(examples):
    instruction = """You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree.

Object Context : 
{object_context}

Actions Dictionary:
{actions_dictionary}
"""

    instruction = instruction.format(object_context=examples['object_context'], actions_dictionary=examples['actions_dictionary'])
    return instruction

for i in range(len(data)):
    instruction = formatting_input(data.iloc[i])
    print(instruction)
    break

formatted_data = pd.DataFrame({
    'instruction': instruction,
    'input': query,
    'output': bt,
})

formatted_data.head()

You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments: message)
RunDiagnostics: Runs system diagnostics to check for issues. (Arguments: )
MoveF

Unnamed: 0,instruction,input,output
0,You are a helpful robot assistant named Goat t...,Can you please bring me the bowl of soup and t...,"<root main_tree_to_execute=""MainTree"">\n <Beh..."
1,You are a helpful robot assistant named Goat t...,"""Can you identify the man in the black jacket ...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
2,You are a helpful robot assistant named Goat t...,"""Can you locate the person standing in front o...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
3,You are a helpful robot assistant named Goat t...,"""Can you display a message on the laptop to re...","<root main_tree_to_execute=""MainTree"">\n <Beh..."
4,You are a helpful robot assistant named Goat t...,"""Can you take the bowl filled with fruit and i...","<root main_tree_to_execute=""MainTree"">\n <Beh..."


In [5]:
data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""

EOS_TOKEN = "<|eot_id|>"

def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
print(training_data[0]["text"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments: message)
RunDiagnostics: Runs

In [6]:

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)


# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)

new_model = "llama-3.2-1b-bt-generator"

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

# Create train/test split
full_dataset = training_data.train_test_split(test_size=0.1, seed=42)

# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=full_dataset["train"],
    eval_dataset=full_dataset["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)




Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/90 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [7]:
data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}"""

EOS_TOKEN = "<|eot_id|>"

def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
print(training_data[0]["text"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful robot assistant named Goat that can answer questions or execute actions by generating a behavior tree.

Object Context : 
a beach chair and umbrella on the beach (28.3m)
a woman walking down the street with an umbrella (96.9m)
a blur of a person in a classroom (95.0m)

Actions Dictionary:
NavigateTo: Moves the robot to a specified target (e.g., location or object). (Arguments: target)
Pick: Picks up a specified object. (Arguments: object)
Place: Places an object at a specified location. (Arguments: object, location)
LocateObject: Searches for a specified object in the environment. (Arguments: object)
RequestAssistance: Requests help for a specific task. (Arguments: task)
Wait: Waits for a specified amount of time. (Arguments: duration)
ExecuteCommand: Runs a specified bash or system command. (Arguments: command)
Communicate: Sends a message to a user or another robot. (Arguments: message)
RunDiagnostics: Runs

In [8]:
instructions = training_data["instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 
I will first LocateObject to find the bowl of soup, then Pick it up, and finally Place it on the table.

Executing the behavior tree:

1. LocateObject: The robot searches for the bowl of soup and finds it 12.5 meters away from the current location.
2. Pick: The robot picks up the bowl of soup.
3. Place: The robot moves the bowl of soup to the table, which is 10.2 meters away from the current location.

The bowl of soup has been placed on the table.<|eot_id|>


In [9]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/45 [00:00<?, ?it/s]

{'loss': 2.1894, 'grad_norm': 1.6736924648284912, 'learning_rate': 2e-05, 'epoch': 0.02}
{'loss': 2.1813, 'grad_norm': 1.658263921737671, 'learning_rate': 4e-05, 'epoch': 0.04}
{'loss': 2.2717, 'grad_norm': 1.7856886386871338, 'learning_rate': 6e-05, 'epoch': 0.07}
{'loss': 2.1768, 'grad_norm': 1.444111943244934, 'learning_rate': 8e-05, 'epoch': 0.09}
{'loss': 2.1288, 'grad_norm': 1.4122183322906494, 'learning_rate': 0.0001, 'epoch': 0.11}
{'loss': 1.9967, 'grad_norm': 1.4140874147415161, 'learning_rate': 0.00012, 'epoch': 0.13}
{'loss': 1.7896, 'grad_norm': 1.3198517560958862, 'learning_rate': 0.00014, 'epoch': 0.16}
{'loss': 1.7409, 'grad_norm': 1.2455224990844727, 'learning_rate': 0.00016, 'epoch': 0.18}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.7841, 'grad_norm': 1.2371742725372314, 'learning_rate': 0.00018, 'epoch': 0.2}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 1.4464372396469116, 'eval_runtime': 2.6664, 'eval_samples_per_second': 3.75, 'eval_steps_per_second': 3.75, 'epoch': 0.2}
{'loss': 1.6032, 'grad_norm': 1.3993887901306152, 'learning_rate': 0.0002, 'epoch': 0.22}
{'loss': 1.4022, 'grad_norm': 1.5437101125717163, 'learning_rate': 0.0001942857142857143, 'epoch': 0.24}
{'loss': 0.92, 'grad_norm': 1.1819545030593872, 'learning_rate': 0.00018857142857142857, 'epoch': 0.27}
{'loss': 0.7297, 'grad_norm': 1.3927961587905884, 'learning_rate': 0.00018285714285714286, 'epoch': 0.29}
{'loss': 0.5167, 'grad_norm': 1.441786766052246, 'learning_rate': 0.00017714285714285713, 'epoch': 0.31}
{'loss': 0.4721, 'grad_norm': 1.334364652633667, 'learning_rate': 0.00017142857142857143, 'epoch': 0.33}
{'loss': 0.3829, 'grad_norm': 1.0687075853347778, 'learning_rate': 0.00016571428571428575, 'epoch': 0.36}
{'loss': 0.3743, 'grad_norm': 0.9090425968170166, 'learning_rate': 0.00016, 'epoch': 0.38}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.3084, 'grad_norm': 0.8688485622406006, 'learning_rate': 0.0001542857142857143, 'epoch': 0.4}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 0.3156535029411316, 'eval_runtime': 2.6662, 'eval_samples_per_second': 3.751, 'eval_steps_per_second': 3.751, 'epoch': 0.4}
{'loss': 0.3659, 'grad_norm': 1.0439064502716064, 'learning_rate': 0.00014857142857142857, 'epoch': 0.42}
{'loss': 0.3255, 'grad_norm': 0.7777609825134277, 'learning_rate': 0.00014285714285714287, 'epoch': 0.44}
{'loss': 0.4225, 'grad_norm': 0.7715872526168823, 'learning_rate': 0.00013714285714285716, 'epoch': 0.47}
{'loss': 0.3619, 'grad_norm': 0.7797138094902039, 'learning_rate': 0.00013142857142857143, 'epoch': 0.49}
{'loss': 0.348, 'grad_norm': 0.7666229605674744, 'learning_rate': 0.00012571428571428572, 'epoch': 0.51}
{'loss': 0.2896, 'grad_norm': 0.6874549388885498, 'learning_rate': 0.00012, 'epoch': 0.53}
{'loss': 0.276, 'grad_norm': 0.5873735547065735, 'learning_rate': 0.00011428571428571428, 'epoch': 0.56}
{'loss': 0.269, 'grad_norm': 0.5358514189720154, 'learning_rate': 0.00010857142857142856, 'epoch': 0.58}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.2625, 'grad_norm': 0.5606418251991272, 'learning_rate': 0.00010285714285714286, 'epoch': 0.6}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 0.2615589499473572, 'eval_runtime': 2.6703, 'eval_samples_per_second': 3.745, 'eval_steps_per_second': 3.745, 'epoch': 0.6}
{'loss': 0.2729, 'grad_norm': 0.6391561627388, 'learning_rate': 9.714285714285715e-05, 'epoch': 0.62}
{'loss': 0.309, 'grad_norm': 0.6247220039367676, 'learning_rate': 9.142857142857143e-05, 'epoch': 0.64}
{'loss': 0.3021, 'grad_norm': 0.5674965977668762, 'learning_rate': 8.571428571428571e-05, 'epoch': 0.67}
{'loss': 0.3386, 'grad_norm': 0.6242198944091797, 'learning_rate': 8e-05, 'epoch': 0.69}
{'loss': 0.2878, 'grad_norm': 0.5658569931983948, 'learning_rate': 7.428571428571429e-05, 'epoch': 0.71}
{'loss': 0.2361, 'grad_norm': 0.5284871459007263, 'learning_rate': 6.857142857142858e-05, 'epoch': 0.73}
{'loss': 0.3249, 'grad_norm': 0.6171308755874634, 'learning_rate': 6.285714285714286e-05, 'epoch': 0.76}
{'loss': 0.3039, 'grad_norm': 0.583638608455658, 'learning_rate': 5.714285714285714e-05, 'epoch': 0.78}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.2951, 'grad_norm': 0.5222903490066528, 'learning_rate': 5.142857142857143e-05, 'epoch': 0.8}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 0.24860134720802307, 'eval_runtime': 2.6694, 'eval_samples_per_second': 3.746, 'eval_steps_per_second': 3.746, 'epoch': 0.8}
{'loss': 0.2684, 'grad_norm': 0.5295025110244751, 'learning_rate': 4.5714285714285716e-05, 'epoch': 0.82}
{'loss': 0.2117, 'grad_norm': 0.544063150882721, 'learning_rate': 4e-05, 'epoch': 0.84}
{'loss': 0.2712, 'grad_norm': 0.6352582573890686, 'learning_rate': 3.428571428571429e-05, 'epoch': 0.87}
{'loss': 0.2913, 'grad_norm': 0.5485438108444214, 'learning_rate': 2.857142857142857e-05, 'epoch': 0.89}
{'loss': 0.2695, 'grad_norm': 0.5706021189689636, 'learning_rate': 2.2857142857142858e-05, 'epoch': 0.91}
{'loss': 0.3321, 'grad_norm': 0.577971339225769, 'learning_rate': 1.7142857142857145e-05, 'epoch': 0.93}
{'loss': 0.2682, 'grad_norm': 0.5459277033805847, 'learning_rate': 1.1428571428571429e-05, 'epoch': 0.96}
{'loss': 0.2507, 'grad_norm': 0.5057494640350342, 'learning_rate': 5.7142857142857145e-06, 'epoch': 0.98}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.2946, 'grad_norm': 0.5583511590957642, 'learning_rate': 0.0, 'epoch': 1.0}


  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 0.244718998670578, 'eval_runtime': 2.6649, 'eval_samples_per_second': 3.752, 'eval_steps_per_second': 3.752, 'epoch': 1.0}
{'train_runtime': 69.1006, 'train_samples_per_second': 1.302, 'train_steps_per_second': 0.651, 'train_loss': 0.7337260117133458, 'epoch': 1.0}


TrainOutput(global_step=45, training_loss=0.7337260117133458, metrics={'train_runtime': 69.1006, 'train_samples_per_second': 1.302, 'train_steps_per_second': 0.651, 'total_flos': 645761559625728.0, 'train_loss': 0.7337260117133458, 'epoch': 1.0})

In [10]:
inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].split(EOS_TOKEN)[0]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 
<root main_tree_to_execute="MainTree">
  <BehaviorTree ID="MainTree">
    <Fallback>
      <Sequence>
        <Action ID="LocateObject" object="bowl_of_soup"/>
        <Action ID="Pick" object="bowl_of_soup"/>
        <Action ID="Place" object="bowl_of_soup" location="table"/>
      </Sequence>
      <Sequence>
        <Action ID="RequestAssistance" task="locate_bowl_of_soup"/>
      </Sequence>
    </Fallback>
  </BehaviorTree>
</root>

