In [6]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig
import torch
from trl import SFTTrainer
import json
import pandas as pd
from datasets import Dataset

import bitsandbytes as bnb

torch.cuda.empty_cache()

In [7]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

base_model = "meta-llama/Llama-3.2-1B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

In [8]:
data = pd.read_json("./queries_dataset.json")
data.head()


Unnamed: 0,object_context,actions_dictionary,query,explanation,bt_xml,bt_json
0,"[{'name': 'a first aid kit', 'position': {'x':...","[{'name': 'Wait', 'description': 'Wait for a s...","Please locate the first aid kit, pick it up wi...","Good, I will first locate the first aid kit an...","<root main_tree_to_execute=""LocatePickAndDeliv...","{'type': 'Sequence', 'name': 'LocatePickAndDel..."
1,"[{'name': 'a smartphone', 'position': {'x': 7....","[{'name': 'Wait', 'description': 'Wait for a s...","Please navigate to the toolkit, pick it up wit...","Good, I will navigate to the toolkit's locatio...","<root main_tree_to_execute=""RetrieveAndPlaceTo...","{'type': 'Sequence', 'name': 'RetrieveAndPlace..."
2,"[{'name': 'a flashlight', 'position': {'x': 9....","[{'name': 'Wait', 'description': 'Wait for a s...","Please locate the crowbar, pick it up with hig...","Good, I will first locate the crowbar to deter...","<root main_tree_to_execute=""LocatePickAndPlace...","{'type': 'Sequence', 'name': 'LocatePickAndPla..."
3,[{'name': 'a silver knife laying on the counte...,"[{'name': 'Wait', 'description': 'Wait for a s...","Please locate the silver knife on the counter,...","I will locate the silver knife on the counter,...","<root main_tree_to_execute=""LocatePickAndPlace...","{'type': 'Sequence', 'name': 'LocatePickAndPla..."
4,"[{'name': 'a toolkit', 'position': {'x': 19.6,...","[{'name': 'Wait', 'description': 'Wait for a s...","Please navigate to the water bottle, pick it u...",I will navigate to the location of the water b...,"<root main_tree_to_execute=""RetrieveAndPlaceWa...","{'type': 'Sequence', 'name': 'RetrieveAndPlace..."


In [9]:
object_context = data['object_context']
actions_dictionary = data['actions_dictionary']
query = data['query']
explanation = data['explanation']
bt_xml = data['bt_xml']
bt_json = data['bt_json']

from prompt_data import template, action_list, object_list, question_example, xml_example, json_example, answer_example, short_template, training_template

data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""


def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


json_system = template.format(
    format_type="JSON",
    example=question_example + "\n" + answer_example + "\n" + json_example,
    available_actions=action_list,
    object_list=object_list,
)

xml_systems = [
    short_template.format(
        format_type="XML",
        example=question_example + "\n" + answer_example + "\n" + xml_example,
        available_actions= action_list,
        object_list=reduced_object_list,
    ) for reduced_object_list in object_context]

training_systems = [
    training_template.format(
        available_actions= action_list,
        object_list=object_list,
    ) for object_list in object_context]

formatted_data = pd.DataFrame({
    'complete_instruction' : xml_systems,
    'instruction': training_systems,
    'input': query,
    'output': bt_xml,
})

formatted_data.head()

Unnamed: 0,complete_instruction,instruction,input,output
0,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...","Please locate the first aid kit, pick it up wi...","<root main_tree_to_execute=""LocatePickAndDeliv..."
1,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...","Please navigate to the toolkit, pick it up wit...","<root main_tree_to_execute=""RetrieveAndPlaceTo..."
2,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...","Please locate the crowbar, pick it up with hig...","<root main_tree_to_execute=""LocatePickAndPlace..."
3,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...","Please locate the silver knife on the counter,...","<root main_tree_to_execute=""LocatePickAndPlace..."
4,"You are GoatBrain, an AI assistant that proces...","You are GoatBrain, an AI assistant that proces...","Please navigate to the water bottle, pick it u...","<root main_tree_to_execute=""RetrieveAndPlaceWa..."


In [10]:
# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
# print(training_data[0]["text"])

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

In [11]:
for i in range(len(training_data)):
    print(len(training_data[i]['text']))

3410
3261
3733
3643
3629


In [12]:
tokenized = tokenizer(training_data[1]['text'], return_tensors="pt")

sequence_length = tokenized.input_ids.size(-1)
print(f"Sequence length: {sequence_length}")

Sequence length: 873


In [13]:

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)


# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)

new_model = "llama-3.2-1b-bt-xml"

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.1,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=True,
    bf16=False,
    group_by_length=True,
    report_to="wandb",
)


from trl import SFTConfig

# Create the SFT config
sft_config = SFTConfig(
    max_seq_length=1100,
    packing=False,
    **training_arguments.to_dict()
)

# Create train/test split
full_dataset = training_data.train_test_split(test_size=0.1, seed=42)


# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=full_dataset["train"],
    eval_dataset=full_dataset["test"],
    peft_config=peft_config,
    #dataset_text_field="text",
    args=sft_config,
    #packing=False,
    processing_class=tokenizer
)





Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

In [14]:
tokenized = tokenizer(training_data[0]['instruction'], return_tensors="pt")

sequence_length = tokenized.input_ids.size(-1)
print(f"Sequence length: {sequence_length}")

Sequence length: 607


In [15]:
instructions = training_data["complete_instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 500, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer of the question is: 

<plan>
<root main_tree_to_execute="LocateAndPickAndDeliverPlan">
    <BehaviorTree ID="LocateAndPickAndDeliverPlan">
        <Locate object="a first aid kit" 
                position_x="{first aid kit position x}" 
                position_y="{first aid kit position y}" 
                position_z="{first aid kit position z}" 
                method="camera_scan"/>
        <Locate object="a first aid kit" 
                position_x="{first aid kit position x}" 
                position_y="{first aid kit position y}" 
                position_z="{first aid kit position z}" 
                method="camera_scan"/>
        <Locate object="a first aid kit" 
                position_x="{first aid kit position x}" 
                position_y="{first aid kit position y}" 
                position_z="{first aid kit position z}" 
                method="camera_scan"/>
        <Locate object="a first aid kit" 
                position_x="{first aid kit position x}" 

In [16]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/2 [00:00<?, ?it/s]

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.3341, 'grad_norm': 1.3466427326202393, 'learning_rate': 2e-05, 'epoch': 0.5}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.2786965370178223, 'eval_runtime': 0.197, 'eval_samples_per_second': 5.077, 'eval_steps_per_second': 5.077, 'epoch': 0.5}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.3383, 'grad_norm': 1.374582052230835, 'learning_rate': 4e-05, 'epoch': 1.0}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.2541751861572266, 'eval_runtime': 0.1989, 'eval_samples_per_second': 5.028, 'eval_steps_per_second': 5.028, 'epoch': 1.0}
{'train_runtime': 4.0846, 'train_samples_per_second': 0.979, 'train_steps_per_second': 0.49, 'train_loss': 1.3362263441085815, 'epoch': 1.0}


TrainOutput(global_step=2, training_loss=1.3362263441085815, metrics={'train_runtime': 4.0846, 'train_samples_per_second': 0.979, 'train_steps_per_second': 0.49, 'total_flos': 22308887605248.0, 'train_loss': 1.3362263441085815, 'epoch': 1.0})

In [17]:
inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 500, use_cache = True)

answer=tokenizer.batch_decode(outputs)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [18]:
EOS_TOKEN = "<|eot_id|>"

In [19]:
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].split(EOS_TOKEN)[0]
print("Answer of the question is:", answer)

Answer of the question is: 

<plan>
<root main_tree_to_execute="LocateAndPickAndDeliverPlan">
    <BehaviorTree ID="LocateAndPickAndDeliverPlan">
        <Locate object="first aid kit" 
                position_x="25.0" 
                position_y="4.0" 
                position_z="0.0" 
                method="camera_scan"/>
        <Locate object="location" 
                position_x="20.0" 
                position_y="5.0" 
                position_z="0.0" 
                method="camera_scan"/>
        <Pick object="first aid kit" 
               grip_strength="medium" 
               precision="high" 
               object_name="first aid kit"/>
        <Navigate x="25.0" y="4.0" z="0.0"/>
        <Deliver object="first aid kit" 
                 surface="location" 
                 orientation="upright" 
                 alignment="center"/>
    </BehaviorTree>
</root>
</plan>

Actions allowed:
[{'name': 'Wait', 'description': 'Wait for a specific duration', 'params': {'duration