In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import IA3Config

import torch
from trl import SFTTrainer
import json
import pandas as pd
from datasets import Dataset

import bitsandbytes as bnb

torch.cuda.empty_cache()

In [2]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

base_model = "meta-llama/Llama-3.2-1B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

In [3]:
data = pd.read_json("./queries_dataset.json")
data.head()


Unnamed: 0,object_context,actions_dictionary,query,explanation,bt_xml,bt_json
0,"[{'name': 'a ladder', 'position': {'x': 30.0, ...","[{'name': 'Wait', 'description': 'Wait for a s...",Can you please locate the cooking pot and then...,I will first locate the cooking pot in the env...,"<root main_tree_to_execute=""LocateAndNavigateT...","{'type': 'Sequence', 'name': 'LocateAndNavigat..."
1,"[{'name': 'a fireproof blanket', 'position': {...","[{'name': 'Wait', 'description': 'Wait for a s...",Could you locate the thermal blanket and then ...,"Good, I will first attempt to locate the therm...","<root main_tree_to_execute=""LocateAndPrepareMe...","{'type': 'Sequence', 'name': 'LocateAndPrepare..."
2,"[{'name': 'a pair of boots', 'position': {'x':...","[{'name': 'Wait', 'description': 'Wait for a s...",Could you please locate the fire extinguisher ...,,"<root main_tree_to_execute=""LocateAndRetrieveB...","{'type': 'Sequence', 'name': 'LocateAndRetriev..."
3,"[{'name': 'a smartphone', 'position': {'x': 7....","[{'name': 'Wait', 'description': 'Wait for a s...",Can you please locate the rope and then naviga...,I will first locate the rope and retrieve its ...,"<root main_tree_to_execute=""LocateAndNavigateT...","{'type': 'Sequence', 'name': 'LocateAndNavigat..."
4,"[{'name': 'a rope', 'position': {'x': 3.9, 'y'...","[{'name': 'Wait', 'description': 'Wait for a s...",Can you please locate the folding knife and th...,"Good, I will first locate the folding knife an...","<root main_tree_to_execute=""LocateKnifeAndNavi...","{'type': 'Sequence', 'name': 'LocateKnifeAndNa..."


In [4]:
object_context = data['object_context']
actions_dictionary = data['actions_dictionary']
query = data['query']
explanation = data['explanation']
bt_xml = data['bt_xml']
bt_json = data['bt_json']

from prompt_data import template, action_list, object_list, question_example, xml_example, json_example, answer_example, short_template, training_template

data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""


def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output)
        texts.append(text)
    return { "text" : texts, }


json_system = template.format(
    format_type="JSON",
    example=question_example + "\n" + answer_example + "\n" + json_example,
    available_actions=action_list,
    object_list=object_list,
)

xml_systems = [
    short_template.format(
        format_type="XML",
        example=question_example + "\n" + answer_example + "\n" + xml_example,
        available_actions= action_list,
        object_list=reduced_object_list,
    ) for reduced_object_list in object_context]

training_systems = [
    training_template.format(
        available_actions= action_list,
        object_list=object_list,
    ) for object_list in object_context]

formatted_data = pd.DataFrame({
    'complete_instruction' : json_system,
    'instruction': training_systems,
    'input': query,
    'output': bt_json.apply(lambda x: f"<plan>{x}</plan>"),
})

formatted_data.head()

Unnamed: 0,complete_instruction,instruction,input,output
0,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Can you please locate the cooking pot and then...,"<plan>{'type': 'Sequence', 'name': 'LocateAndN..."
1,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Could you locate the thermal blanket and then ...,"<plan>{'type': 'Sequence', 'name': 'LocateAndP..."
2,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Could you please locate the fire extinguisher ...,"<plan>{'type': 'Sequence', 'name': 'LocateAndR..."
3,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Can you please locate the rope and then naviga...,"<plan>{'type': 'Sequence', 'name': 'LocateAndN..."
4,"You are GoatBrain, an advanced AI robot assist...","You are GoatBrain, an AI assistant that proces...",Can you please locate the folding knife and th...,"<plan>{'type': 'Sequence', 'name': 'LocateKnif..."


In [5]:
# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(formatted_data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
# print(training_data[0]["text"])

Map:   0%|          | 0/235 [00:00<?, ? examples/s]

In [6]:
tokenized = tokenizer(training_data[1]['text'], return_tensors="pt")

sequence_length = tokenized.input_ids.size(-1)
print(f"Sequence length: {sequence_length}")

Sequence length: 916


In [7]:

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)


# IA3 config
peft_config = IA3Config(task_type="CAUSAL_LM", target_modules=["k_proj", "v_proj", "down_proj"], feedforward_modules=["down_proj"])


new_model = "llama-3.2-1b-bt-json"

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=10,
    evaluation_strategy="steps",
    eval_steps=0.1,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=1e-4,
    fp16=True,
    bf16=False,
    group_by_length=True,
    report_to="wandb",
)


from trl import SFTConfig

# Create the SFT config
sft_config = SFTConfig(
    max_seq_length=1100,
    packing=False,
    **training_arguments.to_dict()
)

# Create train/test split
full_dataset = training_data.train_test_split(test_size=0.1, seed=42)


# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=full_dataset["train"],
    eval_dataset=full_dataset["test"],
    peft_config=peft_config,
    #dataset_text_field="text",
    args=sft_config,
    #packing=False,
    processing_class=tokenizer
)





Map:   0%|          | 0/211 [00:00<?, ? examples/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

In [8]:
tokenized = tokenizer(training_data[0]['instruction'], return_tensors="pt")

sequence_length = tokenized.input_ids.size(-1)
print(f"Sequence length: {sequence_length}")

Sequence length: 601


In [9]:
instructions = training_data["complete_instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

In [10]:
text

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are GoatBrain, an advanced AI robot assistant designed to help with questions and tasks. Your default state is an idle loop where you wait for input and then process it. When processing input, you either answer questions or perform tasks. When a user asks a question, answer it to the best of your ability. When a user requests a task to be performed, follow these steps:\n\n1. Briefly acknowledge the task.\n2. Generate a behavior tree in the specified format (JSON) that represents the steps to complete the task.\n3. IMPORTANT: Always enclose the behavior tree within <plan></plan> tags.\n\nThe behavior tree should use the following node types:\n- Sequence: Executes children in order, stops if one fails\n- Fallback: Tries children in order until one succeeds\n- Retry: Retries its child node a specified number of times\n- Loop: Continuously executes its child nodes\n- anything else: Represents a specific action\n\n### Example

In [11]:
instructions = training_data["complete_instruction"][0]
inputs = training_data["input"][0]
outputs = training_data["output"][0]
text = data_prompt.format(instructions, inputs, "")

inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 500, use_cache = True)

answer=tokenizer.batch_decode(outputs)
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer of the question is: 

I will locate the cooking pot and then navigate to the ladder to reach the top shelf.

<plan>
{
  "type": "Locate",
  "object": "cooking pot sitting on the stove",
  "position_x": "12.0",
  "position_y": "4.5",
  "position_z": "0.0"
}
<plan>
{
  "type": "Navigate",
  "object": "ladder sitting on the kitchen counter",
  "position_x": "28.0",
  "position_y": "14.5",
  "position_z": "3.2"
}
<plan>
{
  "type": "Sequence",
  "name": "LocateAndNavigateToLadderSequence",
  "nodes": [
    {
      "type": "Locate",
      "object": "cooking pot sitting on the stove",
      "position_x": "12.0",
      "position_y": "4.5",
      "position_z": "0.0"
    },
    {
      "type": "Navigate",
      "object": "ladder sitting on the kitchen counter",
      "position_x": "28.0",
      "position_y": "14.5",
      "position_z": "3.2"
    }
  ]
}
</plan>

The cooking pot is located at position (12.0, 4.5, 0.0) and the ladder is located at position (28.0, 14.5, 3.2). We will naviga

In [12]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/1050 [00:00<?, ?it/s]

{'loss': 1.2054, 'grad_norm': 0.21636302769184113, 'learning_rate': 1e-05, 'epoch': 0.01}
{'loss': 1.2358, 'grad_norm': 0.23313747346401215, 'learning_rate': 2e-05, 'epoch': 0.02}
{'loss': 1.2203, 'grad_norm': 0.2310323268175125, 'learning_rate': 3e-05, 'epoch': 0.03}
{'loss': 1.2155, 'grad_norm': 0.2207668274641037, 'learning_rate': 4e-05, 'epoch': 0.04}
{'loss': 1.2292, 'grad_norm': 0.21438543498516083, 'learning_rate': 5e-05, 'epoch': 0.05}
{'loss': 1.2055, 'grad_norm': 0.21367458999156952, 'learning_rate': 6e-05, 'epoch': 0.06}
{'loss': 1.1829, 'grad_norm': 0.2160160094499588, 'learning_rate': 7e-05, 'epoch': 0.07}
{'loss': 1.2358, 'grad_norm': 0.20491787791252136, 'learning_rate': 8e-05, 'epoch': 0.08}
{'loss': 1.1783, 'grad_norm': 0.2214745134115219, 'learning_rate': 9e-05, 'epoch': 0.09}
{'loss': 1.237, 'grad_norm': 0.21505877375602722, 'learning_rate': 0.0001, 'epoch': 0.09}
{'loss': 1.2465, 'grad_norm': 0.22042295336723328, 'learning_rate': 9.990384615384616e-05, 'epoch': 0.1}

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.092, 'grad_norm': 0.13894636929035187, 'learning_rate': 9.086538461538462e-05, 'epoch': 1.0}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 1.05156409740448, 'eval_runtime': 4.0337, 'eval_samples_per_second': 5.95, 'eval_steps_per_second': 5.95, 'epoch': 1.0}
{'loss': 1.1474, 'grad_norm': 0.18053321540355682, 'learning_rate': 9.076923076923078e-05, 'epoch': 1.0}
{'loss': 1.0542, 'grad_norm': 0.11842852830886841, 'learning_rate': 9.067307692307692e-05, 'epoch': 1.01}
{'loss': 1.0182, 'grad_norm': 0.1176689863204956, 'learning_rate': 9.057692307692308e-05, 'epoch': 1.02}
{'loss': 1.0502, 'grad_norm': 0.1388980746269226, 'learning_rate': 9.048076923076924e-05, 'epoch': 1.03}
{'loss': 1.0891, 'grad_norm': 0.12197145074605942, 'learning_rate': 9.038461538461538e-05, 'epoch': 1.04}
{'loss': 1.0077, 'grad_norm': 0.1182665079832077, 'learning_rate': 9.028846153846154e-05, 'epoch': 1.05}
{'loss': 1.0517, 'grad_norm': 0.12270911782979965, 'learning_rate': 9.01923076923077e-05, 'epoch': 1.06}
{'loss': 1.0213, 'grad_norm': 0.12006647139787674, 'learning_rate': 9.009615384615385e-05, 'epoch': 1.07}
{'loss': 1.0107, 'grad_

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.974, 'grad_norm': 0.13967783749103546, 'learning_rate': 8.076923076923078e-05, 'epoch': 1.99}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 0.9143819808959961, 'eval_runtime': 3.968, 'eval_samples_per_second': 6.048, 'eval_steps_per_second': 6.048, 'epoch': 1.99}
{'loss': 0.9785, 'grad_norm': 0.12959308922290802, 'learning_rate': 8.067307692307694e-05, 'epoch': 2.0}
{'loss': 0.9255, 'grad_norm': 0.1423904001712799, 'learning_rate': 8.057692307692308e-05, 'epoch': 2.0}
{'loss': 0.869, 'grad_norm': 0.10696276277303696, 'learning_rate': 8.048076923076924e-05, 'epoch': 2.01}
{'loss': 0.9059, 'grad_norm': 0.1107267513871193, 'learning_rate': 8.038461538461538e-05, 'epoch': 2.02}
{'loss': 0.9219, 'grad_norm': 0.10495732724666595, 'learning_rate': 8.028846153846154e-05, 'epoch': 2.03}
{'loss': 0.8812, 'grad_norm': 0.1051066666841507, 'learning_rate': 8.01923076923077e-05, 'epoch': 2.04}
{'loss': 0.9257, 'grad_norm': 0.10463892668485641, 'learning_rate': 8.009615384615385e-05, 'epoch': 2.05}
{'loss': 0.911, 'grad_norm': 0.1043664887547493, 'learning_rate': 8e-05, 'epoch': 2.06}
{'loss': 0.898, 'grad_norm': 0.10051487

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.8221, 'grad_norm': 0.12485454231500626, 'learning_rate': 7.067307692307694e-05, 'epoch': 2.98}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 0.807083785533905, 'eval_runtime': 3.9847, 'eval_samples_per_second': 6.023, 'eval_steps_per_second': 6.023, 'epoch': 2.98}
{'loss': 0.8199, 'grad_norm': 0.12962892651557922, 'learning_rate': 7.057692307692308e-05, 'epoch': 2.99}
{'loss': 0.781, 'grad_norm': 0.12076839804649353, 'learning_rate': 7.048076923076924e-05, 'epoch': 3.0}
{'loss': 0.7723, 'grad_norm': 0.12069261074066162, 'learning_rate': 7.03846153846154e-05, 'epoch': 3.0}
{'loss': 0.7902, 'grad_norm': 0.10142256319522858, 'learning_rate': 7.028846153846153e-05, 'epoch': 3.01}
{'loss': 0.8056, 'grad_norm': 0.0942678228020668, 'learning_rate': 7.019230769230769e-05, 'epoch': 3.02}
{'loss': 0.785, 'grad_norm': 0.09554058313369751, 'learning_rate': 7.009615384615385e-05, 'epoch': 3.03}
{'loss': 0.8668, 'grad_norm': 0.09746527671813965, 'learning_rate': 7e-05, 'epoch': 3.04}
{'loss': 0.7586, 'grad_norm': 0.09491800516843796, 'learning_rate': 6.990384615384615e-05, 'epoch': 3.05}
{'loss': 0.7979, 'grad_norm': 0.1185

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.7656, 'grad_norm': 0.1278182417154312, 'learning_rate': 6.0576923076923076e-05, 'epoch': 3.97}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 0.718772828578949, 'eval_runtime': 3.8993, 'eval_samples_per_second': 6.155, 'eval_steps_per_second': 6.155, 'epoch': 3.97}
{'loss': 0.7116, 'grad_norm': 0.12466662377119064, 'learning_rate': 6.048076923076923e-05, 'epoch': 3.98}
{'loss': 0.7348, 'grad_norm': 0.1257481426000595, 'learning_rate': 6.038461538461539e-05, 'epoch': 3.99}
{'loss': 0.726, 'grad_norm': 0.0983605608344078, 'learning_rate': 6.028846153846154e-05, 'epoch': 4.0}
{'loss': 0.6741, 'grad_norm': 0.11743161827325821, 'learning_rate': 6.019230769230769e-05, 'epoch': 4.0}
{'loss': 0.7483, 'grad_norm': 0.09682491421699524, 'learning_rate': 6.009615384615385e-05, 'epoch': 4.01}
{'loss': 0.7214, 'grad_norm': 0.10192091017961502, 'learning_rate': 6e-05, 'epoch': 4.02}
{'loss': 0.6926, 'grad_norm': 0.09535983204841614, 'learning_rate': 5.9903846153846154e-05, 'epoch': 4.03}
{'loss': 0.7334, 'grad_norm': 0.09262064099311829, 'learning_rate': 5.980769230769231e-05, 'epoch': 4.04}
{'loss': 0.7194, 'grad_norm': 0.09

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.6055, 'grad_norm': 0.1146186962723732, 'learning_rate': 5.048076923076923e-05, 'epoch': 4.96}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 0.6464077234268188, 'eval_runtime': 3.9323, 'eval_samples_per_second': 6.103, 'eval_steps_per_second': 6.103, 'epoch': 4.96}
{'loss': 0.6614, 'grad_norm': 0.12081826478242874, 'learning_rate': 5.038461538461539e-05, 'epoch': 4.97}
{'loss': 0.6889, 'grad_norm': 0.12546011805534363, 'learning_rate': 5.028846153846154e-05, 'epoch': 4.98}
{'loss': 0.6136, 'grad_norm': 0.1290273368358612, 'learning_rate': 5.019230769230769e-05, 'epoch': 4.99}
{'loss': 0.681, 'grad_norm': 0.10154183208942413, 'learning_rate': 5.009615384615385e-05, 'epoch': 5.0}
{'loss': 0.6058, 'grad_norm': 0.1055796891450882, 'learning_rate': 5e-05, 'epoch': 5.0}
{'loss': 0.6311, 'grad_norm': 0.10015365481376648, 'learning_rate': 4.9903846153846154e-05, 'epoch': 5.01}
{'loss': 0.6532, 'grad_norm': 0.09928323328495026, 'learning_rate': 4.980769230769231e-05, 'epoch': 5.02}
{'loss': 0.6517, 'grad_norm': 0.09271823614835739, 'learning_rate': 4.9711538461538465e-05, 'epoch': 5.03}
{'loss': 0.6596, 'grad_norm': 0.

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.5915, 'grad_norm': 0.11167100071907043, 'learning_rate': 4.038461538461539e-05, 'epoch': 5.95}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 0.5891882181167603, 'eval_runtime': 3.9009, 'eval_samples_per_second': 6.152, 'eval_steps_per_second': 6.152, 'epoch': 5.95}
{'loss': 0.5684, 'grad_norm': 0.11286012828350067, 'learning_rate': 4.028846153846154e-05, 'epoch': 5.96}
{'loss': 0.5553, 'grad_norm': 0.11814149469137192, 'learning_rate': 4.019230769230769e-05, 'epoch': 5.97}
{'loss': 0.5671, 'grad_norm': 0.12404625862836838, 'learning_rate': 4.009615384615385e-05, 'epoch': 5.98}
{'loss': 0.5318, 'grad_norm': 0.1326814889907837, 'learning_rate': 4e-05, 'epoch': 5.99}
{'loss': 0.5684, 'grad_norm': 0.09834802895784378, 'learning_rate': 3.9903846153846155e-05, 'epoch': 6.0}
{'loss': 0.5266, 'grad_norm': 0.1390260010957718, 'learning_rate': 3.980769230769231e-05, 'epoch': 6.0}
{'loss': 0.6126, 'grad_norm': 0.09502752125263214, 'learning_rate': 3.971153846153846e-05, 'epoch': 6.01}
{'loss': 0.6415, 'grad_norm': 0.10261472314596176, 'learning_rate': 3.961538461538462e-05, 'epoch': 6.02}
{'loss': 0.621, 'grad_norm': 0.0

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.5413, 'grad_norm': 0.09940412640571594, 'learning_rate': 3.0288461538461538e-05, 'epoch': 6.94}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 0.5485871434211731, 'eval_runtime': 3.8879, 'eval_samples_per_second': 6.173, 'eval_steps_per_second': 6.173, 'epoch': 6.94}
{'loss': 0.5237, 'grad_norm': 0.11115333437919617, 'learning_rate': 3.0192307692307693e-05, 'epoch': 6.95}
{'loss': 0.4791, 'grad_norm': 0.11475832015275955, 'learning_rate': 3.0096153846153845e-05, 'epoch': 6.96}
{'loss': 0.5768, 'grad_norm': 0.1149929016828537, 'learning_rate': 3e-05, 'epoch': 6.97}
{'loss': 0.5349, 'grad_norm': 0.11929433792829514, 'learning_rate': 2.9903846153846156e-05, 'epoch': 6.98}
{'loss': 0.4845, 'grad_norm': 0.12160157412290573, 'learning_rate': 2.9807692307692308e-05, 'epoch': 6.99}
{'loss': 0.5344, 'grad_norm': 0.09870022535324097, 'learning_rate': 2.9711538461538464e-05, 'epoch': 7.0}
{'loss': 0.5121, 'grad_norm': 0.1517469733953476, 'learning_rate': 2.9615384615384616e-05, 'epoch': 7.0}
{'loss': 0.5643, 'grad_norm': 0.08936234563589096, 'learning_rate': 2.951923076923077e-05, 'epoch': 7.01}
{'loss': 0.5528, 'grad_norm

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.4673, 'grad_norm': 0.10779254138469696, 'learning_rate': 2.0192307692307694e-05, 'epoch': 7.93}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 0.5207555890083313, 'eval_runtime': 3.8916, 'eval_samples_per_second': 6.167, 'eval_steps_per_second': 6.167, 'epoch': 7.93}
{'loss': 0.4931, 'grad_norm': 0.10708397626876831, 'learning_rate': 2.0096153846153846e-05, 'epoch': 7.94}
{'loss': 0.4666, 'grad_norm': 0.11787093430757523, 'learning_rate': 2e-05, 'epoch': 7.95}
{'loss': 0.5006, 'grad_norm': 0.10871973633766174, 'learning_rate': 1.9903846153846154e-05, 'epoch': 7.96}
{'loss': 0.4649, 'grad_norm': 0.11733262240886688, 'learning_rate': 1.980769230769231e-05, 'epoch': 7.97}
{'loss': 0.5147, 'grad_norm': 0.11379452794790268, 'learning_rate': 1.971153846153846e-05, 'epoch': 7.98}
{'loss': 0.4936, 'grad_norm': 0.12262211740016937, 'learning_rate': 1.9615384615384617e-05, 'epoch': 7.99}
{'loss': 0.5366, 'grad_norm': 0.09306957572698593, 'learning_rate': 1.951923076923077e-05, 'epoch': 8.0}
{'loss': 0.465, 'grad_norm': 0.14500504732131958, 'learning_rate': 1.9423076923076924e-05, 'epoch': 8.0}
{'loss': 0.6162, 'grad_norm'

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.4931, 'grad_norm': 0.11508264392614365, 'learning_rate': 1.0096153846153847e-05, 'epoch': 8.92}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 0.5042760968208313, 'eval_runtime': 3.8802, 'eval_samples_per_second': 6.185, 'eval_steps_per_second': 6.185, 'epoch': 8.92}
{'loss': 0.4678, 'grad_norm': 0.09580028057098389, 'learning_rate': 1e-05, 'epoch': 8.93}
{'loss': 0.4436, 'grad_norm': 0.10834742337465286, 'learning_rate': 9.903846153846155e-06, 'epoch': 8.94}
{'loss': 0.4775, 'grad_norm': 0.1017976775765419, 'learning_rate': 9.807692307692308e-06, 'epoch': 8.95}
{'loss': 0.4656, 'grad_norm': 0.09735629707574844, 'learning_rate': 9.711538461538462e-06, 'epoch': 8.96}
{'loss': 0.4501, 'grad_norm': 0.10604124516248703, 'learning_rate': 9.615384615384616e-06, 'epoch': 8.97}
{'loss': 0.4271, 'grad_norm': 0.12680378556251526, 'learning_rate': 9.51923076923077e-06, 'epoch': 8.98}
{'loss': 0.4789, 'grad_norm': 0.11746407300233841, 'learning_rate': 9.423076923076923e-06, 'epoch': 8.99}
{'loss': 0.4937, 'grad_norm': 0.09564755111932755, 'learning_rate': 9.326923076923077e-06, 'epoch': 9.0}
{'loss': 0.4343, 'grad_norm': 0.

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.477, 'grad_norm': 0.0893506407737732, 'learning_rate': 0.0, 'epoch': 9.91}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_loss': 0.4989147186279297, 'eval_runtime': 3.8798, 'eval_samples_per_second': 6.186, 'eval_steps_per_second': 6.186, 'epoch': 9.91}
{'train_runtime': 928.2662, 'train_samples_per_second': 2.273, 'train_steps_per_second': 1.131, 'train_loss': 0.7121471986316499, 'epoch': 9.91}


TrainOutput(global_step=1050, training_loss=0.7121471986316499, metrics={'train_runtime': 928.2662, 'train_samples_per_second': 2.273, 'train_steps_per_second': 1.131, 'total_flos': 1.2461244957597696e+16, 'train_loss': 0.7121471986316499, 'epoch': 9.909952606635072})

In [13]:
inputs = tokenizer([
    text
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 500, use_cache = True)

answer=tokenizer.batch_decode(outputs)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [14]:
EOS_TOKEN = "<|eot_id|>"

In [15]:
answer=answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].split(EOS_TOKEN)[0]
print("Answer of the question is:", answer)

Answer of the question is: 

I'm happy to help, but I must point out that a cooking pot is not a physical object that can be used to reach a top shelf. However, I can process a plan that simulates this scenario.

Here is a plan that locates the cooking pot and then navigates to the ladder to retrieve it:

```
<plan>
  {
    "type": "Locate",
    "object": "cooking pot sitting on the counter",
    "position_x": "{pot_position_x}",
    "position_y": "{pot_position_y}",
    "position_z": "{pot_position_z}"
  }
  <plan>
    {
      "type": "Navigate",
      "x": "{pot_position_x}",
      "y": "{pot_position_y}",
      "z": "{pot_position_z}"
    }
    <plan>
      {
        "type": "Locate",
        "object": "ladder sitting on the floor",
        "position_x": "{ladder_position_x}",
        "position_y": "{ladder_position_y}",
        "position_z": "{ladder_position_z}"
      }
      <plan>
        {
          "type": "Navigate",
          "x": "{ladder_position_x}",
          "y": "{ladd

In [16]:
json_system = template.format(
    format_type="JSON",
    example=question_example + "\n" + answer_example + "\n" + json_example,
    available_actions=action_list,
    object_list=object_list,
)

In [17]:
from evaluation import evaluate_model

json_1b = evaluate_model(model=model,
                        tokenizer=tokenizer,
                        formatting_prompt=formatting_prompt,
                        validation_type="json",
                        query_file="./query_dataset.json",
                        instruction=json_system,
                        action_list=action_list)

100%|██████████| 50/50 [17:40<00:00, 21.21s/it]


In [18]:
json_1b

{'score': 0.44,
 'plans': {'no plan': [['Retrieve the red apple from the kitchen counter and place it on the wooden cutting board on the dining table.',
    '\n\nI will retrieve the red apple from the kitchen counter and place it on the wooden cutting board on the dining table.\n\n<plan>\n{\n  "type": "Locate",\n  "object": "red apple sitting on the kitchen counter",\n  "position_x": "{apple_position_x}",\n  "position_y": "{apple_position_y}",\n  "position_z": "{apple_position_z}"\n}\n\n<plan>\n{\n  "type": "Navigate",\n  "x": "{apple_position_x}",\n  "y": "{apple_position_y}",\n  "z": "{apple_position_z}",\n  "output": {\n    "position_x": "{apple_position_x}",\n    "position_y": "{apple_position_y}",\n    "position_z": "{apple_position_z}"\n  }\n}\n\n<plan>\n{\n  "type": "Pick",\n  "object": "red apple sitting on the kitchen counter",\n  "grip_strength": "medium",\n  "precision": "high",\n  "output": {\n    "position_x": "{apple_position_x}",\n    "position_y": "{apple_position_y}",\