In [1]:
import pandas as pd
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import (
    LoraConfig,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer

import pandas as pd
from datasets import Dataset

import bitsandbytes as bnb

from huggingface_hub import login

torch.cuda.empty_cache()

In [2]:
data = pd.read_json("./bt_dataset.json")
data.head()

Unnamed: 0,instruction,input,output
0,You will be provided a summary of a task perfo...,The behavior tree orchestrates the navigation ...,<!--\n This Behavior Tree replans the global ...
1,You will be provided a summary of a task perfo...,The behavior tree is designed to control a rob...,<!--\n This Behavior Tree replans the global ...
2,You will be provided a summary of a task perfo...,The behavior tree is a simple sequential task ...,"<root main_tree_to_execute = ""MainTree"" >\n ..."
3,You will be provided a summary of a task perfo...,The behavior tree represents a robot's task. I...,"<root main_tree_to_execute = ""MainTree"" >\n ..."
4,You will be provided a summary of a task perfo...,The behavior tree represents a robot's navigat...,<!--\n This Behavior Tree first computes a pa...


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 594 entries, 0 to 593
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   instruction  594 non-null    object
 1   input        594 non-null    object
 2   output       594 non-null    object
dtypes: object(3)
memory usage: 14.0+ KB


In [4]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

base_model = "meta-llama/Llama-3.2-1B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

In [5]:
base_instruction = "You will be provided a summary of a task performed by a behavior tree, and your objective is to express this behavior tree in XML format."
data_prompt = base_instruction + """

### Input:
{}

### Output:
{}"""

training_data = Dataset.from_pandas(data)
EOS_TOKEN = tokenizer.eos_token
def formatting_prompt(examples):
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for input_, output in zip(inputs, outputs):
        text = data_prompt.format(input_, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

training_data = Dataset.from_pandas(data)
training_data = training_data.map(formatting_prompt, batched=True)

Map:   0%|          | 0/594 [00:00<?, ? examples/s]

In [6]:
data_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""

EOS_TOKEN = "<|eot_id|>"

def formatting_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input_, output in zip(instructions, inputs, outputs):
        text = data_prompt.format(instruction,input_, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }


# Create the dataset and apply the mapping
training_data = Dataset.from_pandas(data)
training_data = training_data.map(formatting_prompt, batched=True)

# Display a sample for verification
print(training_data[0]["text"])

Map:   0%|          | 0/594 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You will be provided a summary of a task performed by a behavior tree, and your objective is to express this behavior tree in XML format.<|eot_id|>
<|start_header_id|>user<|end_header_id|>
The behavior tree orchestrates the navigation of a robot by periodically replanning its global path at a frequency of 1 Hz. It utilizes a pipeline sequence, where it first computes a path to a specified goal using a "GridBased" planner and then follows this computed path using a designated controller. This approach ensures that the robot continuously updates its path to adapt to dynamic environments or changing conditions, enabling it to navigate effectively towards its goal while avoiding obstacles or other potential disruptions.<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
<!--
  This Behavior Tree replans the global path periodically at 1 Hz.
-->

<root main_tree_to_execute="MainTree">
  <BehaviorTree ID="MainTree">
    <Pipeli

In [7]:
EOS_TOKEN

'<|eot_id|>'

In [8]:
training_data['text'][0]

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou will be provided a summary of a task performed by a behavior tree, and your objective is to express this behavior tree in XML format.<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\nThe behavior tree orchestrates the navigation of a robot by periodically replanning its global path at a frequency of 1 Hz. It utilizes a pipeline sequence, where it first computes a path to a specified goal using a "GridBased" planner and then follows this computed path using a designated controller. This approach ensures that the robot continuously updates its path to adapt to dynamic environments or changing conditions, enabling it to navigate effectively towards its goal while avoiding obstacles or other potential disruptions.<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n<!--\n  This Behavior Tree replans the global path periodically at 1 Hz.\n-->\n\n<root main_tree_to_execute="MainTree">\n  <BehaviorTree ID="MainTree">\

In [9]:

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)


# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)

new_model = "llama-3.2-3b-it-Ecommerce-ChatBot"

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

# Create train/test split
full_dataset = training_data.train_test_split(test_size=0.1, seed=42)

# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=full_dataset["train"],
    eval_dataset=full_dataset["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/534 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

In [10]:
text="The behavior tree orchestrates the navigation of a robot by periodically replanning its global path at a frequency of 1 Hz. It utilizes a pipeline sequence, where it first computes a path to a specified goal using a \"GridBased\" planner and then follows this computed path using a designated controller. This approach ensures that the robot continuously updates its path to adapt to dynamic environments or changing conditions, enabling it to navigate effectively towards its goal while avoiding obstacles or other potential disruptions."

inputs = tokenizer([
    data_prompt.format(base_instruction, text, "")
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020)

answer=tokenizer.batch_decode(outputs)
answer_short = answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer_short)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 
<|start_header_id|>assistant<|end_header_id|>

Here is a sample XML representation of a behavior tree for the robot navigation task:
```
<behavior tree>
  <goal>
    <name>ReachGoal</name>
    <description>Reach the goal</description>
  </goal>
  <plan>
    <sequence>
      <grid-based-planner>
        <plan-name>PlanPath</plan-name>
        <parameters>
          <parameter>
            <name>gridSize</name>
            <type>integer</type>
            <value>20</value>
          </parameter>
          <parameter>
            <name>goalDistance</name>
            <type>float</type>
            <value>1</value>
          </parameter>
          <parameter>
            <name>obstacleDistance</name>
            <type>float</type>
            <value>0.5</value>
          </parameter>
        </parameters>
        <grid-based-planner>
          <plan-name>ComputePath</plan-name>
          <parameters>
            <parameter>
              <name>obstacles</name>
 

In [12]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/267 [00:00<?, ?it/s]

{'loss': 2.1013, 'grad_norm': 1.8830718994140625, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 2.421, 'grad_norm': 1.8772141933441162, 'learning_rate': 4e-05, 'epoch': 0.01}
{'loss': 2.7165, 'grad_norm': 2.0224382877349854, 'learning_rate': 6e-05, 'epoch': 0.01}
{'loss': 2.3947, 'grad_norm': 1.8407808542251587, 'learning_rate': 8e-05, 'epoch': 0.01}
{'loss': 2.1724, 'grad_norm': 1.8222862482070923, 'learning_rate': 0.0001, 'epoch': 0.02}
{'loss': 2.1665, 'grad_norm': 1.8011784553527832, 'learning_rate': 0.00012, 'epoch': 0.02}
{'loss': 2.0663, 'grad_norm': 1.4311195611953735, 'learning_rate': 0.00014, 'epoch': 0.03}
{'loss': 1.9541, 'grad_norm': 1.2858277559280396, 'learning_rate': 0.00016, 'epoch': 0.03}
{'loss': 2.2059, 'grad_norm': 1.5256670713424683, 'learning_rate': 0.00018, 'epoch': 0.03}
{'loss': 1.9583, 'grad_norm': 1.4222426414489746, 'learning_rate': 0.0002, 'epoch': 0.04}
{'loss': 1.6651, 'grad_norm': 1.292578935623169, 'learning_rate': 0.0001992217898832685, 'epoch': 0.04

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.9368, 'grad_norm': 1.131678581237793, 'learning_rate': 0.00016575875486381326, 'epoch': 0.2}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 1.0513944625854492, 'eval_runtime': 7.1822, 'eval_samples_per_second': 8.354, 'eval_steps_per_second': 8.354, 'epoch': 0.2}
{'loss': 1.1019, 'grad_norm': 1.245175838470459, 'learning_rate': 0.0001649805447470817, 'epoch': 0.21}
{'loss': 1.6829, 'grad_norm': 1.2905633449554443, 'learning_rate': 0.0001642023346303502, 'epoch': 0.21}
{'loss': 0.8785, 'grad_norm': 1.0657894611358643, 'learning_rate': 0.0001634241245136187, 'epoch': 0.21}
{'loss': 1.6903, 'grad_norm': 1.3576346635818481, 'learning_rate': 0.00016264591439688717, 'epoch': 0.22}
{'loss': 1.1169, 'grad_norm': 1.0778228044509888, 'learning_rate': 0.00016186770428015565, 'epoch': 0.22}
{'loss': 1.1219, 'grad_norm': 1.0789772272109985, 'learning_rate': 0.00016108949416342413, 'epoch': 0.22}
{'loss': 1.3648, 'grad_norm': 1.075291395187378, 'learning_rate': 0.0001603112840466926, 'epoch': 0.23}
{'loss': 1.1717, 'grad_norm': 1.0265227556228638, 'learning_rate': 0.00015953307392996112, 'epoch': 0.23}
{'loss': 1.1388, 'gr

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.0815, 'grad_norm': 1.4700428247451782, 'learning_rate': 0.0001237354085603113, 'epoch': 0.4}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.8824385404586792, 'eval_runtime': 7.1601, 'eval_samples_per_second': 8.38, 'eval_steps_per_second': 8.38, 'epoch': 0.4}
{'loss': 1.3075, 'grad_norm': 1.4731686115264893, 'learning_rate': 0.00012295719844357977, 'epoch': 0.41}
{'loss': 1.109, 'grad_norm': 1.3077154159545898, 'learning_rate': 0.00012217898832684825, 'epoch': 0.41}
{'loss': 0.892, 'grad_norm': 1.0182552337646484, 'learning_rate': 0.00012140077821011673, 'epoch': 0.42}
{'loss': 1.2891, 'grad_norm': 1.2608592510223389, 'learning_rate': 0.00012062256809338521, 'epoch': 0.42}
{'loss': 1.1296, 'grad_norm': 1.3740692138671875, 'learning_rate': 0.0001198443579766537, 'epoch': 0.42}
{'loss': 0.8057, 'grad_norm': 1.1053519248962402, 'learning_rate': 0.00011906614785992218, 'epoch': 0.43}
{'loss': 0.9645, 'grad_norm': 1.1471889019012451, 'learning_rate': 0.00011828793774319066, 'epoch': 0.43}
{'loss': 1.2156, 'grad_norm': 1.1369529962539673, 'learning_rate': 0.00011750972762645916, 'epoch': 0.43}
{'loss': 1.106, 'gr

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.2676, 'grad_norm': 1.2928547859191895, 'learning_rate': 8.171206225680935e-05, 'epoch': 0.61}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.8092658519744873, 'eval_runtime': 7.1682, 'eval_samples_per_second': 8.37, 'eval_steps_per_second': 8.37, 'epoch': 0.61}
{'loss': 0.845, 'grad_norm': 1.2524007558822632, 'learning_rate': 8.093385214007783e-05, 'epoch': 0.61}
{'loss': 0.9633, 'grad_norm': 1.1888108253479004, 'learning_rate': 8.01556420233463e-05, 'epoch': 0.61}
{'loss': 0.8437, 'grad_norm': 1.3482218980789185, 'learning_rate': 7.937743190661478e-05, 'epoch': 0.62}
{'loss': 0.9326, 'grad_norm': 1.2056831121444702, 'learning_rate': 7.859922178988328e-05, 'epoch': 0.62}
{'loss': 1.1795, 'grad_norm': 1.1872425079345703, 'learning_rate': 7.782101167315176e-05, 'epoch': 0.63}
{'loss': 1.3429, 'grad_norm': 1.2588169574737549, 'learning_rate': 7.704280155642024e-05, 'epoch': 0.63}
{'loss': 1.0006, 'grad_norm': 1.1733083724975586, 'learning_rate': 7.626459143968871e-05, 'epoch': 0.63}
{'loss': 1.4383, 'grad_norm': 1.5050380229949951, 'learning_rate': 7.54863813229572e-05, 'epoch': 0.64}
{'loss': 1.1078, 'grad_nor

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.1202, 'grad_norm': 1.1193838119506836, 'learning_rate': 3.968871595330739e-05, 'epoch': 0.81}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.7645450830459595, 'eval_runtime': 7.1665, 'eval_samples_per_second': 8.372, 'eval_steps_per_second': 8.372, 'epoch': 0.81}
{'loss': 0.6383, 'grad_norm': 1.0341718196868896, 'learning_rate': 3.891050583657588e-05, 'epoch': 0.81}
{'loss': 1.2019, 'grad_norm': 1.2994176149368286, 'learning_rate': 3.813229571984436e-05, 'epoch': 0.82}
{'loss': 1.1588, 'grad_norm': 1.256150484085083, 'learning_rate': 3.735408560311284e-05, 'epoch': 0.82}
{'loss': 1.1317, 'grad_norm': 1.267090916633606, 'learning_rate': 3.657587548638132e-05, 'epoch': 0.82}
{'loss': 0.8991, 'grad_norm': 1.248318076133728, 'learning_rate': 3.579766536964981e-05, 'epoch': 0.83}
{'loss': 0.8528, 'grad_norm': 1.147974967956543, 'learning_rate': 3.501945525291829e-05, 'epoch': 0.83}
{'loss': 0.529, 'grad_norm': 1.159918189048767, 'learning_rate': 3.4241245136186774e-05, 'epoch': 0.84}
{'loss': 1.0075, 'grad_norm': 1.416674017906189, 'learning_rate': 3.346303501945525e-05, 'epoch': 0.84}
{'loss': 0.5635, 'grad_norm

TrainOutput(global_step=267, training_loss=1.0071701993433277, metrics={'train_runtime': 161.3499, 'train_samples_per_second': 3.31, 'train_steps_per_second': 1.655, 'total_flos': 1267678764466176.0, 'train_loss': 1.0071701993433277, 'epoch': 1.0})

In [16]:
text="The behavior tree orchestrates the navigation of a robot by periodically replanning its global path at a frequency of 1 Hz. It utilizes a pipeline sequence, where it first computes a path to a specified goal using a \"GridBased\" planner and then follows this computed path using a designated controller. This approach ensures that the robot continuously updates its path to adapt to dynamic environments or changing conditions, enabling it to navigate effectively towards its goal while avoiding obstacles or other potential disruptions."
text="The behavior tree orchestrates the navigation of a robot to find a broken leaks"
inputs = tokenizer([
    data_prompt.format(base_instruction,text, "")
], return_tensors='pt', padding=True, truncation=True).to("cuda")


outputs = model.generate(**inputs, max_new_tokens = 2020)

answer=tokenizer.batch_decode(outputs)
answer_short = answer[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1]
print("Answer of the question is:", answer_short)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer of the question is: 

<!--
  This Behavior Tree first attempts to navigate to a position. If that fails, it will
  then try to navigate to a specific goal.
-->

<root main_tree_to_execute="MainTree">
  <BehaviorTree ID="MainTree">
    <Sequence name="root_sequence">
      <Fallback name="root_fallback">
        <GoalReached/>
        <NavigateToPose goal="{goal}" name="navigate_to_pose"/>
      </Fallback>
      <NavigateToPose goal="{goal}" name="navigate_to_pose"/>
    </Sequence>
  </BehaviorTree>
</root>

<|eot_id|>


In [12]:
answer

['<|begin_of_text|>You will be provided a summary of a task performed by a behavior tree, and your objective is to express this behavior tree in XML format.\n\n### Input:\nThe behavior tree orchestrates the navigation of a robot by periodically replanning its global path at a frequency of 1 Hz. It utilizes a pipeline sequence, where it first computes a path to a specified goal using a "GridBased" planner and then follows this computed path using a designated controller. This approach ensures that the robot continuously updates its path to adapt to dynamic environments or changing conditions, enabling it to navigate effectively towards its goal while avoiding obstacles or other potential disruptions.\n\n### Output:\n<!--\n  This Behavior Tree replans the global path periodically at 1 Hz.\n-->\n\n<root main_tree_to_execute="MainTree">\n  <BehaviorTree ID="MainTree">\n    <PipelineSequence name="NavigateWithReplanning">\n      <RateController hz="1.0">\n        <ComputePathToPose goal="{g