In [1]:
# Check requirements
!pip install -q -r requirements.txt

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.9/190.9 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [19]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch, accelerate, bitsandbytes

# Models path
llamachat_model = 'meta-llama/Llama-2-7b-chat-hf'
codellama_model = 'codellama/CodeLlama-7b-Instruct-hf'

# Adapters path
llamachat_adapter = 'lora_adapters/llamachat-bt-adapter'
codellama_adapter = 'lora_adapters/codellama-bt-adapter'

To access the models on Hugging Face, you need to log in using your access token. You can do this in one of the following ways:
- Use the command huggingface-cli login in your terminal and insert your access token
- Alternatively, you can provide your access token as a parameter named token in both from_pretrained functions. Replace "hf_token" with your actual access token

In [3]:
# Select here the model and the corresponding adapter
model_id = llamachat_model
adapter_id = llamachat_adapter

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path = model_id,
    #token = hf_token
)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path = model_id,
    load_in_8bit = True,
    torch_dtype = torch.float16,
    device_map = "auto",
    trust_remote_code = True,
    #token = hf_token,
)

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [4]:
context = "<<SYS>> You will be provided a summary of a task performed by a behavior tree, and your objective is to express this behavior tree in XML format.\n <</SYS>>"
task = """The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "ArucoStand", then follow the arucos given their IDs: 10, 1, and 7. Finally the robot reset the manipulator to the parking position and goes to the location "Parking". The only available actions that must be used in the behavior tree are: "MoveTo", "FollowAruco" and "ResetManipulator"."""

# One-shot example
example_task = """The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "Station A", then follow the aruco with ID=7. The only available actions that must be used in the behavior tree are: "MoveTo", "FollowAruco"."""
example_output = """
<root main_tree_to_execute = "MainTree" >
    <BehaviorTree ID="MainTree">
        <Sequence>
            <MoveTo location="Station A"/>
            <FollowAruco id="7"/>
        </Sequence>
    </BehaviorTree>
</root>
"""


### LlamaChat Prompt


#### zero-shot

In [13]:
eval_prompt = "<s>[INST]" + context + task + "[/INST]</s>"
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

#### one-shot

In [17]:
eval_prompt = "<s>[INST]" + context + example_task + "[/INST]</s>" + example_output + "[INST]" + task + "[/INST]"
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

### CodeLlama Prompt

#### zero-shot

In [10]:
eval_prompt = context + "[INST]" + task + "[/INST]"
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

#### one-shot

In [11]:
eval_prompt = context + "[INST]" + example_task + "[/INST]" + example_output + "[INST]" + task + "[/INST]"
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

### Base model inference

In [None]:
base_model.eval()
with torch.no_grad():
    print(tokenizer.decode(base_model.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True))

### Fine-tuned model inference

In [15]:
# Load fine-tuned model
finetuned_model = PeftModel.from_pretrained(base_model, adapter_id)
finetuned_model = finetuned_model.merge_and_unload()



In [18]:
finetuned_model.eval()
with torch.no_grad():
    print(tokenizer.decode(finetuned_model.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True))

[INST]<<SYS>> You will be provided a summary of a task performed by a behavior tree, and your objective is to express this behavior tree in XML format.
 <</SYS>>The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "Station A", then follow the aruco with ID=7. The only available actions that must be used in the behavior tree are: "MoveTo", "FollowAruco".[/INST] 
<root main_tree_to_execute = "MainTree" >
    <BehaviorTree ID="MainTree">
        <Sequence>
            <MoveTo location="Station A"/>
            <FollowAruco id="7"/>
        </Sequence>
    </BehaviorTree>
</root>
[INST]The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "ArucoStand", then follow the arucos given their IDs: 10, 1, and 7. Finally the robot reset the manipulator to the parking position and goes to the location "Parking". The only available actions that must be used in the behavior tree are: "Move