In [7]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig
import torch
from trl import SFTTrainer
import json
import pandas as pd
from datasets import Dataset

import bitsandbytes as bnb

torch.cuda.empty_cache()

In [8]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

base_model = "meta-llama/Llama-3.2-1B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

In [9]:
data = pd.read_json("./queries_dataset.json")
data.head()


Unnamed: 0,object_context,actions_dictionary,query,explanation,bt_xml,bt_json
0,"[{'name': 'a ladder', 'position': {'x': 30.0, ...","[{'name': 'Wait', 'description': 'Wait for a s...",Can you please locate the cooking pot and then...,I will first locate the cooking pot in the env...,"<root main_tree_to_execute=""LocateAndNavigateT...","{'type': 'Sequence', 'name': 'LocateAndNavigat..."
1,"[{'name': 'a fireproof blanket', 'position': {...","[{'name': 'Wait', 'description': 'Wait for a s...",Could you locate the thermal blanket and then ...,"Good, I will first attempt to locate the therm...","<root main_tree_to_execute=""LocateAndPrepareMe...","{'type': 'Sequence', 'name': 'LocateAndPrepare..."
2,"[{'name': 'a pair of boots', 'position': {'x':...","[{'name': 'Wait', 'description': 'Wait for a s...",Could you please locate the fire extinguisher ...,,"<root main_tree_to_execute=""LocateAndRetrieveB...","{'type': 'Sequence', 'name': 'LocateAndRetriev..."
3,"[{'name': 'a smartphone', 'position': {'x': 7....","[{'name': 'Wait', 'description': 'Wait for a s...",Can you please locate the rope and then naviga...,I will first locate the rope and retrieve its ...,"<root main_tree_to_execute=""LocateAndNavigateT...","{'type': 'Sequence', 'name': 'LocateAndNavigat..."
4,"[{'name': 'a rope', 'position': {'x': 3.9, 'y'...","[{'name': 'Wait', 'description': 'Wait for a s...",Can you please locate the folding knife and th...,"Good, I will first locate the folding knife an...","<root main_tree_to_execute=""LocateKnifeAndNavi...","{'type': 'Sequence', 'name': 'LocateKnifeAndNa..."


In [10]:
object_context = data['object_context']
actions_dictionary = data['actions_dictionary']
query = data['query']
explanation = data['explanation']
bt_xml = data['bt_xml']
bt_json = data['bt_json']

from prompt_data import template, action_list, object_list, question_example, xml_example, json_example, answer_example, short_template, training_template

data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""


def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


json_system = template.format(
    format_type="JSON",
    example=question_example + "\n" + answer_example + "\n" + json_example,
    available_actions=action_list,
    object_list=object_list,
)

xml_systems = [
    short_template.format(
        format_type="XML",
        example=question_example + "\n" + answer_example + "\n" + xml_example,
        available_actions= action_list,
        object_list=reduced_object_list,
    ) for reduced_object_list in object_context]

training_systems = [
    training_template.format(
        available_actions= action_list,
        object_list=object_list,
    ) for object_list in object_context]

formatted_data = pd.DataFrame({
    'complete_instruction' : xml_systems,
    'instruction': training_systems,
    'input': query,
    'output': bt_xml,
})

formatted_data.head()

Unnamed: 0,complete_instruction,instruction,input,output
0,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...",Can you please locate the cooking pot and then...,"<root main_tree_to_execute=""LocateAndNavigateT..."
1,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...",Could you locate the thermal blanket and then ...,"<root main_tree_to_execute=""LocateAndPrepareMe..."
2,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...",Could you please locate the fire extinguisher ...,"<root main_tree_to_execute=""LocateAndRetrieveB..."
3,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...",Can you please locate the rope and then naviga...,"<root main_tree_to_execute=""LocateAndNavigateT..."
4,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...",Can you please locate the folding knife and th...,"<root main_tree_to_execute=""LocateKnifeAndNavi..."


In [11]:
# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
# print(training_data[0]["text"])

Map:   0%|          | 0/235 [00:00<?, ? examples/s]

In [12]:
for i in range(len(training_data)):
    print(len(training_data[i]['text']))

3324
3189
3898
3032
3908
3221
3015
3530
3735
3203
3715
3642
4095
3662
4066
4463
3976
4008
4104
4478
4258
3577
3854
3798
3363
4334
3732
3437
3399
3586
4668
4176
3761
3774
4022
3470
3652
3708
4114
3992
3343
3685
3974
3396
3620
3941
3594
3749
4259
3536
4694
3632
3503
3412
4613
3053
3687
3998
3428
3859
4217
3391
4152
4107
4066
3769
3816
4040
4370
4374
3978
3967
3844
4075
3611
3893
4849
3603
4303
3960
4326
3599
4014
3798
4019
3385
3385
3273
3876
3851
3644
3292
3631
4018
3512
3536
3593
3351
3768
3721
4025
3790
3837
3767
3994
3315
3821
3928
5402
4339
3419
3894
3870
3986
3492
3579
3781
3367
3860
3295
3786
3920
3527
3659
3420
3537
3506
3809
3399
4042
3950
3897
3991
3828
3518
3887
3858
3926
3508
4048
4412
4039
3636
4295
4027
3858
4261
4015
3731
4086
3684
3744
4631
3639
3601
3786
3894
3440
3930
3665
4061
3379
3683
3907
3894
3458
3978
3294
3607
3831
3341
3685
3529
3591
3752
4336
3265
3950
3705
3753
4368
3451
3783
3534
3913
3583
4052
4236
3994
4507
3516
4047
3762
3518
3653
3723
3795
3823
3444
4082


In [13]:
tokenized = tokenizer(training_data[1]['text'], return_tensors="pt")

sequence_length = tokenized.input_ids.size(-1)
print(f"Sequence length: {sequence_length}")

Sequence length: 920


In [14]:

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)


# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)

new_model = "llama-3.2-1b-bt-xml"

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.1,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=True,
    bf16=False,
    group_by_length=True,
    report_to="wandb",
)


from trl import SFTConfig

# Create the SFT config
sft_config = SFTConfig(
    max_seq_length=1100,
    packing=False,
    **training_arguments.to_dict()
)

# Create train/test split
full_dataset = training_data.train_test_split(test_size=0.1, seed=42)


# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=full_dataset["train"],
    eval_dataset=full_dataset["test"],
    peft_config=peft_config,
    #dataset_text_field="text",
    args=sft_config,
    #packing=False,
    processing_class=tokenizer
)





Map:   0%|          | 0/211 [00:00<?, ? examples/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

In [15]:
tokenized = tokenizer(training_data[0]['instruction'], return_tensors="pt")

sequence_length = tokenized.input_ids.size(-1)
print(f"Sequence length: {sequence_length}")

Sequence length: 601


In [16]:
instructions = training_data["complete_instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

In [17]:
text

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are GoatBrain, an AI assistant that processes questions and tasks. For questions, provide direct answers. For tasks:\n1. Acknowledge the task\n2. Generate a behavior tree in XML format\n3. Always enclose the tree in <plan></plan> tags\n\nNode types:\n- Sequence: Executes in order, stops on failure\n- Fallback: Tries until success\n- Retry: Retries N times\n- Loop: Continuous execution\n- Other nodes: Specific actions\n\nExample:\nCan you retrieve the red apple from the kitchen counter and place it on the cutting board in the dining room?\nI will locate the red apple on the kitchen counter, pick it up with medium grip strength and high precision, and then place it on the wooden cutting board on the dining table\n\n<plan>\n<root main_tree_to_execute="RetrieveAndPlaceAppleSequence">\n    <BehaviorTree ID="RetrieveAndPlaceAppleSequence">\n        <Sequence name="RetrieveAndPlaceApple">\n            <Retry num_attempts="3">\n

In [18]:
instructions = training_data["complete_instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 500, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer of the question is: 

<plan>
<root main_tree_to_execute="LocateAndNavigateToPlan">
    <Locate object="cooking pot sitting on the cooking pot" 
            position_x="2.5" 
            position_y="1.0" 
            position_z="0.0" 
            method="range_scan"/>
    <Locate object="ladder sitting on the ground" 
            position_x="1.0" 
            position_y="1.5" 
            position_z="1.2" 
            method="range_scan"/>
    <Navigate x="2.5" y="1.0" z="0.0"/>
</root>
</plan>

Actions allowed:
{'name': 'Locate', 'description': 'Find an object', 'params': {'object': "cooking pot",'method': 'range_scan'}}, {'name': 'Locate', 'description': 'Find an object', 'params': {'object': "ladder",'method': 'range_scan'}}, {'name': 'Navigate', 'description': 'Go to a destination', 'params': {'x': 'float, X coordinate of the destination, e.g., 1.5', 'y': 'float, Y coordinate of the destination, e.g., 1.0', 'z': 'float, Z coordinate of the destination, e.g., 0.0'}}, {'name': 

: 

In [None]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/105 [00:00<?, ?it/s]

{'loss': 1.2519, 'grad_norm': 1.1440707445144653, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 1.2935, 'grad_norm': 1.2197479009628296, 'learning_rate': 4e-05, 'epoch': 0.02}
{'loss': 1.2653, 'grad_norm': 1.1378055810928345, 'learning_rate': 6e-05, 'epoch': 0.03}


In [14]:
inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 500, use_cache = True)

answer=tokenizer.batch_decode(outputs)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [15]:
EOS_TOKEN = "<|eot_id|>"

In [16]:
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].split(EOS_TOKEN)[0]
print("Answer of the question is:", answer)

Answer of the question is: 

<plan>
    <root main_tree_to_execute="LocateAndRetrieveFromShelfSequence">
        <BehaviorTree ID="LocateAndRetrieveFromShelfSequence">
            <Sequence name="LocateAndRetrieveFromShelf">
                <Retry num_attempts="3">
                    <Locate object="cooking pot" 
                             position_x="{pot_position_x}" 
                             position_y="{pot_position_y}" 
                             position_z="{pot_position_z}" 
                             method="camera_scan"/>
                </Retry>
                <Retry num_attempts="2">
                    <Navigate x="{pot_position_x}" y="{pot_position_y}"/>
                </Retry>
                <Retry num_attempts="2">
                    <Pick object="cooking pot" 
                          grip_strength="medium" 
                          precision="high"/>
                </Retry>
                <Retry num_attempts="2">
                    <Place object="co

In [17]:
xml_system = template.format(
    format_type="XML",
    example=question_example + "\n" + answer_example + "\n" + xml_example,
    available_actions=action_list,
    object_list=object_list,
)

In [18]:
from evaluation import evaluate_model

xml_1b = evaluate_model(model=model,
                        tokenizer=tokenizer,
                        formatting_prompt=formatting_prompt,
                        validation_type="xml",
                        query_file="./query_dataset.json",
                        instruction=xml_system,
                        action_list=action_list)

100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 50/50 [18:11<00:00, 21.84s/it]


In [19]:
xml_1b

{'score': 0.76,
 'plans': {'no plan': [['Follow a predefined path to navigate to the fireproof blanket and the walkie-talkie.',
    '\n\n<plan>\n    <root main_tree_to_execute="NavigateToFireproofBlanketAndWalkieTalkieSequence">\n        <BehaviorTree ID="NavigateToFireproofBlanketAndWalkieTalkieSequence">\n            <Retry num_attempts="3">\n                <Locate object="red apple sitting on the kitchen counter" \n                        position_x="{apple_position_x}" \n                        position_y="{apple_position_y}" \n                        position_z="{apple_position_z}" \n                        method="camera_scan"/>\n            </Retry>\n            <Retry num_attempts="2">\n                <Navigate x="{apple_position_x}" y="{apple_position_y}" orientation="upright" alignment="center"/>\n            </Retry>\n            <Retry num_attempts="2">\n                <Pick object="red apple" \n                      grip_strength="medium" \n                      precisi