In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig
import torch
from trl import SFTTrainer
import json
import pandas as pd
from datasets import Dataset

import bitsandbytes as bnb

torch.cuda.empty_cache()

In [2]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

base_model = "meta-llama/Llama-3.2-1B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

In [3]:
data = pd.read_json("./queries_dataset.json")
data.head()


Unnamed: 0,object_context,actions_dictionary,query,explanation,bt_xml,bt_json
0,"[{'name': 'a ladder', 'position': {'x': 30.0, ...","[{'name': 'Wait', 'description': 'Wait for a s...",Can you please locate the cooking pot and then...,I will first locate the cooking pot in the env...,"<root main_tree_to_execute=""LocateAndNavigateT...","{'type': 'Sequence', 'name': 'LocateAndNavigat..."
1,"[{'name': 'a fireproof blanket', 'position': {...","[{'name': 'Wait', 'description': 'Wait for a s...",Could you locate the thermal blanket and then ...,"Good, I will first attempt to locate the therm...","<root main_tree_to_execute=""LocateAndPrepareMe...","{'type': 'Sequence', 'name': 'LocateAndPrepare..."
2,"[{'name': 'a pair of boots', 'position': {'x':...","[{'name': 'Wait', 'description': 'Wait for a s...",Could you please locate the fire extinguisher ...,,"<root main_tree_to_execute=""LocateAndRetrieveB...","{'type': 'Sequence', 'name': 'LocateAndRetriev..."
3,"[{'name': 'a smartphone', 'position': {'x': 7....","[{'name': 'Wait', 'description': 'Wait for a s...",Can you please locate the rope and then naviga...,I will first locate the rope and retrieve its ...,"<root main_tree_to_execute=""LocateAndNavigateT...","{'type': 'Sequence', 'name': 'LocateAndNavigat..."
4,"[{'name': 'a rope', 'position': {'x': 3.9, 'y'...","[{'name': 'Wait', 'description': 'Wait for a s...",Can you please locate the folding knife and th...,"Good, I will first locate the folding knife an...","<root main_tree_to_execute=""LocateKnifeAndNavi...","{'type': 'Sequence', 'name': 'LocateKnifeAndNa..."


In [4]:
object_context = data['object_context']
actions_dictionary = data['actions_dictionary']
query = data['query']
explanation = data['explanation']
bt_xml = data['bt_xml']
bt_json = data['bt_json']

from prompt_data import template, action_list, object_list, question_example, xml_example, json_example, answer_example, short_template, training_template

data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""


def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


json_system = template.format(
    format_type="JSON",
    example=question_example + "\n" + answer_example + "\n" + json_example,
    available_actions=action_list,
    object_list=object_list,
)

xml_systems = [
    short_template.format(
        format_type="XML",
        example=question_example + "\n" + answer_example + "\n" + xml_example,
        available_actions= action_list,
        object_list=reduced_object_list,
    ) for reduced_object_list in object_context]

training_systems = [
    training_template.format(
        available_actions= action_list,
        object_list=object_list,
    ) for object_list in object_context]

formatted_data = pd.DataFrame({
    'complete_instruction' : json_system,
    'instruction': training_systems,
    'input': query,
    'output': bt_json.apply(lambda x: f"<plan>{x}</plan>"),
})

formatted_data.head()

Unnamed: 0,complete_instruction,instruction,input,output
0,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Can you please locate the cooking pot and then...,"<plan>{'type': 'Sequence', 'name': 'LocateAndN..."
1,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Could you locate the thermal blanket and then ...,"<plan>{'type': 'Sequence', 'name': 'LocateAndP..."
2,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Could you please locate the fire extinguisher ...,"<plan>{'type': 'Sequence', 'name': 'LocateAndR..."
3,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Can you please locate the rope and then naviga...,"<plan>{'type': 'Sequence', 'name': 'LocateAndN..."
4,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Can you please locate the folding knife and th...,"<plan>{'type': 'Sequence', 'name': 'LocateKnif..."


In [5]:
# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
# print(training_data[0]["text"])

Map:   0%|          | 0/235 [00:00<?, ? examples/s]

In [6]:

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)


# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)

new_model = "llama-3.2-1b-bt-json"

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.1,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=True,
    bf16=False,
    group_by_length=True,
    report_to="wandb",
)


from trl import SFTConfig

# Create the SFT config
sft_config = SFTConfig(
    max_seq_length=1100,
    packing=False,
    **training_arguments.to_dict()
)

# Create train/test split
full_dataset = training_data.train_test_split(test_size=0.1, seed=42)


# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=full_dataset["train"],
    eval_dataset=full_dataset["test"],
    peft_config=peft_config,
    #dataset_text_field="text",
    args=sft_config,
    #packing=False,
    processing_class=tokenizer
)





Map:   0%|          | 0/211 [00:00<?, ? examples/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

In [7]:
instructions = training_data["complete_instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

: 

In [12]:
instructions = training_data["complete_instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 500, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [None]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/22 [00:00<?, ?it/s]

{'loss': 1.2504, 'grad_norm': 1.129077434539795, 'learning_rate': 2e-05, 'epoch': 0.04}
{'loss': 1.2787, 'grad_norm': 1.26353120803833, 'learning_rate': 4e-05, 'epoch': 0.09}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.2625, 'grad_norm': 1.289148211479187, 'learning_rate': 6e-05, 'epoch': 0.13}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 1.2265100479125977, 'eval_runtime': 0.8852, 'eval_samples_per_second': 5.648, 'eval_steps_per_second': 5.648, 'epoch': 0.13}
{'loss': 1.2095, 'grad_norm': 1.2298741340637207, 'learning_rate': 8e-05, 'epoch': 0.18}
{'loss': 1.2491, 'grad_norm': 1.1414207220077515, 'learning_rate': 0.0001, 'epoch': 0.22}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.9802, 'grad_norm': 0.8362124562263489, 'learning_rate': 0.00012, 'epoch': 0.27}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.9928520321846008, 'eval_runtime': 0.8903, 'eval_samples_per_second': 5.616, 'eval_steps_per_second': 5.616, 'epoch': 0.27}
{'loss': 0.9921, 'grad_norm': 0.8174057006835938, 'learning_rate': 0.00014, 'epoch': 0.31}
{'loss': 0.9251, 'grad_norm': 0.7811418175697327, 'learning_rate': 0.00016, 'epoch': 0.36}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.7895, 'grad_norm': 0.846963107585907, 'learning_rate': 0.00018, 'epoch': 0.4}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.7134875655174255, 'eval_runtime': 0.8814, 'eval_samples_per_second': 5.673, 'eval_steps_per_second': 5.673, 'epoch': 0.4}
{'loss': 0.7098, 'grad_norm': 0.85690838098526, 'learning_rate': 0.0002, 'epoch': 0.44}
{'loss': 0.6401, 'grad_norm': 0.7762086987495422, 'learning_rate': 0.00018333333333333334, 'epoch': 0.49}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.5096, 'grad_norm': 0.9112257957458496, 'learning_rate': 0.0001666666666666667, 'epoch': 0.53}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.4622914791107178, 'eval_runtime': 0.8918, 'eval_samples_per_second': 5.606, 'eval_steps_per_second': 5.606, 'epoch': 0.53}
{'loss': 0.4644, 'grad_norm': 0.7520711421966553, 'learning_rate': 0.00015000000000000001, 'epoch': 0.58}
{'loss': 0.3671, 'grad_norm': 0.7199646830558777, 'learning_rate': 0.00013333333333333334, 'epoch': 0.62}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.3521, 'grad_norm': 0.7687719464302063, 'learning_rate': 0.00011666666666666668, 'epoch': 0.67}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.332821786403656, 'eval_runtime': 0.8796, 'eval_samples_per_second': 5.685, 'eval_steps_per_second': 5.685, 'epoch': 0.67}
{'loss': 0.3898, 'grad_norm': 0.7310489416122437, 'learning_rate': 0.0001, 'epoch': 0.71}
{'loss': 0.3522, 'grad_norm': 0.6121394634246826, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.76}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.3234, 'grad_norm': 0.6048145294189453, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.8}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.2776604890823364, 'eval_runtime': 0.8901, 'eval_samples_per_second': 5.617, 'eval_steps_per_second': 5.617, 'epoch': 0.8}
{'loss': 0.2952, 'grad_norm': 0.6212493777275085, 'learning_rate': 5e-05, 'epoch': 0.84}
{'loss': 0.2541, 'grad_norm': 0.5752468705177307, 'learning_rate': 3.3333333333333335e-05, 'epoch': 0.89}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.3384, 'grad_norm': 0.6326730251312256, 'learning_rate': 1.6666666666666667e-05, 'epoch': 0.93}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.260170042514801, 'eval_runtime': 0.8821, 'eval_samples_per_second': 5.668, 'eval_steps_per_second': 5.668, 'epoch': 0.93}
{'loss': 0.2549, 'grad_norm': 0.5146613717079163, 'learning_rate': 0.0, 'epoch': 0.98}
{'train_runtime': 37.6473, 'train_samples_per_second': 1.195, 'train_steps_per_second': 0.584, 'train_loss': 0.6903720132329247, 'epoch': 0.98}


TrainOutput(global_step=22, training_loss=0.6903720132329247, metrics={'train_runtime': 37.6473, 'train_samples_per_second': 1.195, 'train_steps_per_second': 0.584, 'total_flos': 264156838858752.0, 'train_loss': 0.6903720132329247, 'epoch': 0.9777777777777777})

In [14]:
inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 500, use_cache = True)

answer=tokenizer.batch_decode(outputs)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [15]:
EOS_TOKEN = "<|eot_id|>"

In [16]:
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].split(EOS_TOKEN)[0]
print("Answer of the question is:", answer)

Answer of the question is: 

<plan>
  <Sequence name="LocateAndNavigateToLadderSequence">
    <Retry nodes>
      <Locate object="cooking pot" position_x="20.4" position_y="7.7" position_z="1.2" />
      <Retry nodes>
        <Navigate x="20.4" y="7.7" z="1.2" />
        <Retry nodes>
          <Locate object="wooden cutting board on the dining table" position_x="4.5" position_y="2.2" position_z="0.75" />
          <Retry nodes>
            <Navigate x="4.5" y="2.2" z="0.75" />
            <Retry nodes>
              <Locate object="silver knife laying on the counter" position_x="1.8" position_y="2.1" position_z="0.8" />
              <Retry nodes>
                <Navigate x="1.8" y="2.1" z="0.8" />
                <Retry nodes>
                  <Locate object="ceramic plate with some bread crumbs" position_x="2.7" position_y="2.0" position_z="0.85" />
                  <Retry nodes>
                    <Navigate x="2.7" y="2.0" z="0.85" />
                  </Locate object="cooking 

In [17]:
json_system = template.format(
    format_type="JSON",
    example=question_example + "\n" + answer_example + "\n" + json_example,
    available_actions=action_list,
    object_list=object_list,
)

In [18]:
json_system

'You are GoatBrain, an advanced AI robot assistant designed to help with questions and tasks. Your default state is an idle loop where you wait for input and then process it. When processing input, you either answer questions or perform tasks. When a user asks a question, answer it to the best of your ability. When a user requests a task to be performed, follow these steps:\n\n1. Briefly acknowledge the task.\n2. Generate a behavior tree in the specified format (JSON) that represents the steps to complete the task.\n3. IMPORTANT: Always enclose the behavior tree within <plan></plan> tags.\n\nThe behavior tree should use the following node types:\n- Sequence: Executes children in order, stops if one fails\n- Fallback: Tries children in order until one succeeds\n- Retry: Retries its child node a specified number of times\n- Loop: Continuously executes its child nodes\n- anything else: Represents a specific action\n\n### Example of a behavior tree:\nHere is an example :\n\nCan you retriev

In [22]:
from evaluation import evaluate_model

json_1b = evaluate_model(model=model,
                        tokenizer=tokenizer,
                        formatting_prompt=formatting_prompt,
                        validation_type="json",
                        query_file="./query_dataset.json",
                        instruction=json_system,
                        action_list=action_list)

100%|██████████| 50/50 [19:47<00:00, 23.74s/it]


In [23]:
json_1b

{'score': 0.64,
 'plans': {'no plan': [['Pick up the roll of duct tape and place it on the crowbar.',
    '\n\n<plan>\n{\n  "type": "Sequence",\n  "name": "RetrieveAndPlaceTapeSequence",\n  "nodes": [\n    {\n      "type": "Retry",\n      "retries": 3,\n      "nodes": [\n        {\n          "type": "Locate",\n          "object": "roll of duct tape",\n          "position_x": "{tape_position_x}",\n          "position_y": "{tape_position_y}",\n          "position_z": "{tape_position_z}"\n        }\n      ]\n    },\n    {\n      "type": "Retry",\n      "retries": 2,\n      "nodes": [\n        {\n          "type": "Navigate",\n          "x": "{crowbar_position_x}",\n          "y": "{crowbar_position_y}",\n          "z": "{crowbar_position_z}"\n        }\n      ]\n    },\n    {\n      "type": "Retry",\n      "retries": 2,\n      "nodes": [\n        {\n          "type": "Pick",\n          "object": "roll of duct tape",\n          "grip_strength": "medium",\n          "precision": "high"\n   

In [26]:
json_1b['plans']['valid']

['\n  {\n    "type": "Sequence",\n    "name": "RetrieveAndPlaceAppleSequence",\n    "nodes": [\n      {\n        "type": "Retry",\n        "retries": 3,\n        "nodes": [\n          {\n            "type": "Locate",\n            "object": "red apple sitting on the kitchen counter",\n            "position_x": "{apple_position_x}",\n            "position_y": "{apple_position_y}",\n            "position_z": "{apple_position_z}"\n          },\n          {\n            "type": "Navigate",\n            "x": "{apple_position_x}",\n            "y": "{apple_position_y}",\n            "z": "{apple_position_z}"\n          },\n          {\n            "type": "Pick",\n            "object": "red apple sitting on the kitchen counter",\n            "grip_strength": "medium",\n            "precision": "high"\n          },\n          {\n            "type": "Place",\n            "object": "red apple",\n            "surface": "wooden cutting board on the dining table",\n            "orientation": "uprig