In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, IA3Config
import torch
from trl import SFTTrainer
import json
import pandas as pd
from datasets import Dataset
import bitsandbytes as bnb
from enum import Enum
import wandb
from dataclasses import dataclass
from typing import List, Optional
from trl import SFTConfig


# Clear CUDA cache
torch.cuda.empty_cache()

In [2]:
class ModelSize(Enum):
    LLAMA_1B = "meta-llama/Llama-3.2-1B-Instruct"
    LLAMA_3B = "meta-llama/Llama-3.2-3B-Instruct"

class TrainingMethod(Enum):
    LORA = "lora"
    IA3 = "ia3"

class OutputFormat(Enum):
    XML = "xml"
    JSON = "json"

In [3]:

@dataclass
class TrainingConfig:
    model_size: ModelSize
    training_method: TrainingMethod
    output_format: OutputFormat
    num_epochs: int = 10
    learning_rate: float = 2e-4
    batch_size: int = 1
    gradient_accumulation_steps: int = 2
    max_seq_length: int = 900

def setup_model_and_tokenizer(model_size: ModelSize):
    """Initialize model and tokenizer with appropriate configuration"""
    torch_dtype = torch.float16
    attn_implementation = "eager"

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch_dtype,
        bnb_4bit_use_double_quant=True,
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_size.value,
        quantization_config=bnb_config,
        device_map="auto",
        attn_implementation=attn_implementation
    )

    tokenizer = AutoTokenizer.from_pretrained(model_size.value, trust_remote_code=True)
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

    return model, tokenizer

def find_all_linear_names(model):
    """Find all linear layer names in the model for LoRA configuration"""
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

def get_peft_config(method: TrainingMethod, model):
    """Get the appropriate PEFT configuration based on training method"""
    if method == TrainingMethod.LORA:
        return LoraConfig(
            r=16,
            lora_alpha=32,
            lora_dropout=0.05,
            bias="none",
            task_type="CAUSAL_LM",
            target_modules=find_all_linear_names(model)
        )
    else:  # IA3
        return IA3Config(
            task_type="CAUSAL_LM",
            target_modules=["k_proj", "v_proj", "down_proj"],
            feedforward_modules=["down_proj"]
        )

def formatting_prompt(examples):
    llama_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{}
"""
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction,input, output in zip(instructions, inputs, outputs):
        text = llama_prompt.format(instruction,input, output)
        texts.append(text)
    return { "text" : texts, }

def prepare_training_data(output_format: OutputFormat):
    """Prepare training data based on output format"""
    # Load your data
    data = pd.read_json("../data/behavior_tree_dataset.json")
    
    # Extract relevant columns
    object_context = data['object_context']
    action_context = data['actions_dictionary']
    query = data['query']
    bt_xml = data['bt_xml']
    bt_json = data['bt_json']

    # Load templates
    with open('../data/templates.json', 'r') as f:
        templates = json.load(f)
    
    # Load actions
    with open('../data/actions.json', 'r') as f:
        actions = json.load(f)
    
    # Load objects
    with open('../data/objects.json', 'r') as f:
        objects = json.load(f)

    template = templates['template']
    action_list = actions['action_list']
    object_list = objects['object_list']
    question_example = templates['question_example']
    xml_example = templates['xml_example']
    json_example = templates['json_example']
    answer_example = templates['answer_example']
    training_template = templates['training_template']

    # Format data based on output format
    if output_format == OutputFormat.XML:
        system_template = template.format(
            format_type="XML",
            example=question_example + "\n" + answer_example + "\n" + xml_example,
            available_actions=action_list,
            object_list=object_list,
        )
        output_data = bt_xml.apply(lambda x: f"<plan>{x}</plan>")
    else:
        system_template = template.format(
            format_type="JSON",
            example=question_example + "\n" + answer_example + "\n" + json_example,
            available_actions=action_list,
            object_list=object_list,
        )
        output_data = bt_json.apply(lambda x: f"<plan>{x}</plan>")
    
    training_systems = [
        training_template.format(
            available_actions=action_list,
            object_list=object_list,
        ) for object_list in object_context]

    formatted_data = pd.DataFrame({
        'complete_instruction': system_template,
        'instruction': training_systems,
        'input': query,
        'output': output_data,
    })

    # Return the formatted data (only first 20 examples)
    formatted_data = formatted_data.head(20)

    # Print first training example output column
    print(formatted_data.iloc[0]['output'])

    return Dataset.from_pandas(formatted_data).map(formatting_prompt, batched=True)

def setup_trainer(model, tokenizer, training_data, config: TrainingConfig):
    """Setup the trainer with appropriate configuration"""
    model_name = f"llama-{config.model_size.value.split('-')[-2]}-bt-{config.output_format.value}-{config.training_method.value}"
    
    training_arguments = TrainingArguments(
        output_dir=model_name,
        per_device_train_batch_size=config.batch_size,
        per_device_eval_batch_size=config.batch_size,
        gradient_accumulation_steps=config.gradient_accumulation_steps,
        optim="paged_adamw_32bit",
        num_train_epochs=config.num_epochs if config.training_method == TrainingMethod.IA3 else 1,
        evaluation_strategy="steps",
        eval_steps=0.1,
        logging_steps=1,
        warmup_steps=10,
        learning_rate=1e-4 if config.training_method == TrainingMethod.IA3 else config.learning_rate,
        fp16=True,
        bf16=False,
        group_by_length=True,
        report_to="wandb",
    )

    full_dataset = training_data.train_test_split(test_size=0.1, seed=42)

    sft_config = SFTConfig(
        max_seq_length=config.max_seq_length,
        packing=False,
        **training_arguments.to_dict()
    )


    return SFTTrainer(
        model=model,
        train_dataset=full_dataset["train"],
        eval_dataset=full_dataset["test"],
        peft_config=get_peft_config(config.training_method, model),
        args=sft_config,
        processing_class=tokenizer,
    )

In [4]:
# Initialize wandb
wandb.init(project="bt-training")

# Training configuration
config = TrainingConfig(
    model_size=ModelSize.LLAMA_1B,
    training_method=TrainingMethod.LORA,
    output_format=OutputFormat.XML,
)

# Setup
model, tokenizer = setup_model_and_tokenizer(config.model_size)
training_data = prepare_training_data(config.output_format)
trainer = setup_trainer(model, tokenizer, training_data, config)

# Start training
trainer.train()

# Save the model
trainer.save_model()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msimonroy99[0m ([33msimonroy99-cole-de-technologie-sup-rieure[0m). Use [1m`wandb login --relogin`[0m to force relogin


<plan><root main_tree_to_execute="LocateAndNavigateToLadderSequence">
    <BehaviorTree ID="LocateAndNavigateToLadderSequence">
        <Sequence name="LocateAndNavigateToLadder">
            <Retry num_attempts="3">
                <Locate object="cooking pot" 
                        position_x="{cooking_pot_position_x}" 
                        position_y="{cooking_pot_position_y}" 
                        position_z="{cooking_pot_position_z}" />
            </Retry>
            <Fallback name="HandleCookingPotFallback">
                <Sequence name="NavigateAfterLocatingPot">
                    <Retry num_attempts="2">
                        <Navigate x="{ladder_position_x}" y="{ladder_position_y}" />
                    </Retry>
                </Sequence>
                <Sequence name="FailedToLocatePotHandling">
                    <Wait duration="2.0" />
                    <Navigate x="{ladder_position_x}" y="{ladder_position_y}" />
                </Sequence>
           

Map:   0%|          | 0/20 [00:00<?, ? examples/s]



Map:   0%|          | 0/18 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]



  0%|          | 0/9 [00:00<?, ?it/s]

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.4355, 'grad_norm': 1.2515957355499268, 'learning_rate': 2e-05, 'epoch': 0.11}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.4686474800109863, 'eval_runtime': 0.3487, 'eval_samples_per_second': 5.735, 'eval_steps_per_second': 5.735, 'epoch': 0.11}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.1542, 'grad_norm': 1.0203007459640503, 'learning_rate': 4e-05, 'epoch': 0.22}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.4441306591033936, 'eval_runtime': 0.3415, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 0.22}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.4437, 'grad_norm': 1.2312090396881104, 'learning_rate': 6e-05, 'epoch': 0.33}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.3912301063537598, 'eval_runtime': 0.342, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 0.33}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.3792, 'grad_norm': 1.2149678468704224, 'learning_rate': 8e-05, 'epoch': 0.44}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.3182319402694702, 'eval_runtime': 0.3395, 'eval_samples_per_second': 5.89, 'eval_steps_per_second': 5.89, 'epoch': 0.44}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.3153, 'grad_norm': 1.0672197341918945, 'learning_rate': 0.0001, 'epoch': 0.56}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.2337559461593628, 'eval_runtime': 0.3443, 'eval_samples_per_second': 5.81, 'eval_steps_per_second': 5.81, 'epoch': 0.56}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.2065, 'grad_norm': 0.9875402450561523, 'learning_rate': 0.00012, 'epoch': 0.67}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1360883712768555, 'eval_runtime': 0.3431, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 0.67}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.9277, 'grad_norm': 0.7119359970092773, 'learning_rate': 0.00014, 'epoch': 0.78}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0340609550476074, 'eval_runtime': 0.3425, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 0.78}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 1.0344, 'grad_norm': 0.9036762714385986, 'learning_rate': 0.00016, 'epoch': 0.89}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9266351461410522, 'eval_runtime': 0.3627, 'eval_samples_per_second': 5.515, 'eval_steps_per_second': 5.515, 'epoch': 0.89}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'loss': 0.8899, 'grad_norm': 0.828197717666626, 'learning_rate': 0.00018, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8178315162658691, 'eval_runtime': 0.3567, 'eval_samples_per_second': 5.607, 'eval_steps_per_second': 5.607, 'epoch': 1.0}
{'train_runtime': 11.3162, 'train_samples_per_second': 1.591, 'train_steps_per_second': 0.795, 'train_loss': 1.1984891162978277, 'epoch': 1.0}


In [8]:
from peft import PeftModel, PeftConfig
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from typing import Optional, Dict, Any
import os
from evaluation import evaluate_model
import json

def load_trained_model(
    base_model_name: str,
    adapter_path: str,
    device: str = "auto"
) -> tuple[AutoModelForCausalLM, AutoTokenizer]:
    """
    Load a trained LoRA model and its tokenizer.
    
    Args:
        base_model_name: Name of the base model
        adapter_path: Path to the trained adapter weights
        device: Device to load the model on
    
    Returns:
        tuple: (model, tokenizer)
    """
    # Initialize tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    
    # Setup base model with same config as training
    torch_dtype = torch.float16
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch_dtype,
        bnb_4bit_use_double_quant=True,
    )
    
    # Load base model
    model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        quantization_config=bnb_config,
        device_map=device,
        torch_dtype=torch_dtype
    )
    
    # Load adapter weights
    model = PeftModel.from_pretrained(model, adapter_path)
    
    return model, tokenizer

def evaluate_all_models(
    model_configs: list[Dict[str, Any]],
    template: str,
    question_example: str,
    answer_example: str,
    json_example: str,
    xml_example: str,
    action_list: list[str],
    object_list: list[str],
    query_file: str,
    evaluate_model_fn: callable
) -> Dict[str, Any]:
    """
    Evaluate multiple trained models with different configurations.
    
    Args:
        model_configs: List of dictionaries containing model configurations
        template: Template string for system prompt
        question_example: Example question
        answer_example: Example answer
        json_example: Example JSON output
        action_list: List of available actions
        object_list: List of available objects
        query_file: Path to query dataset
        evaluate_model_fn: Function to evaluate individual model
    
    Returns:
        Dict containing evaluation results for each model
    """
    results = {}
    
    for config in model_configs:
        model_name = f"llama-{config['size']}-bt-{config['format']}-{config['method']}"
        adapter_path = f"./{model_name}"

        # Check if the model exists
        if not os.path.exists(adapter_path):
            print(f"Model {model_name} does not exist. Skipping evaluation.")
            continue
        
        # Load model
        model, tokenizer = load_trained_model(
            config['base_model'],
            adapter_path
        )
        
        # Prepare system prompt
        if config['format'].lower() == 'xml':
            system_prompt = template.format(
                format_type=config['format'].upper(),
                example=f"{question_example}\n{answer_example}\n{xml_example}",
                available_actions=action_list,
                object_list=object_list,
            ) 
        else:
            system_prompt = template.format(
                format_type=config['format'].upper(),
                example=f"{question_example}\n{answer_example}\n{json_example}",
                available_actions=action_list,
                object_list=object_list,
            )

        
        # Evaluate model
        results[model_name] = evaluate_model_fn(
            model=model,
            tokenizer=tokenizer,
            formatting_prompt=formatting_prompt,
            validation_type=config['format'].lower(),
            query_file=query_file,
            instruction=system_prompt,
            action_list=action_list
        )
        
        # Clear CUDA cache
        del model
        torch.cuda.empty_cache()
    
    return results

In [9]:
torch.cuda.empty_cache()

In [None]:
# Load templates
with open('../data/templates.json', 'r') as f:
    templates = json.load(f)

# Load actions
with open('../data/actions.json', 'r') as f:
    actions = json.load(f)

# Load objects
with open('../data/objects.json', 'r') as f:
    objects = json.load(f)

template = templates['template']
action_list = actions['action_list']
object_list = objects['object_list']
question_example = templates['question_example']
xml_example = templates['xml_example']
json_example = templates['json_example']
answer_example = templates['answer_example']
training_template = templates['training_template']

# Define configurations for all models you want to evaluate
model_configs = [
    {
        'size': '1b',
        'method': 'lora',
        'format': 'json',
        'base_model': ModelSize.LLAMA_1B.value
    },
    {
        'size': '1b',
        'method': 'lora',
        'format': 'xml',
        'base_model': ModelSize.LLAMA_1B.value
    },
    {
        'size': '3b',
        'method': 'lora',
        'format': 'json',
        'base_model': ModelSize.LLAMA_3B.value
    },
    {
        'size': '3b',
        'method': 'lora',
        'format': 'xml',
        'base_model': ModelSize.LLAMA_3B.value
    }
]

# Evaluate all models
results = evaluate_all_models(
    model_configs=model_configs,
    template=template,
    question_example=question_example,
    answer_example=answer_example,
    json_example=json_example,
    xml_example=xml_example,
    action_list=action_list,
    object_list=object_list,
    query_file="../data/task_dataset.json",
    evaluate_model_fn=evaluate_model
)

# Print results
for model_name, result in results.items():
    print(f"\nResults for {model_name}:")
    print(result)

Model llama-1b-bt-json-lora does not exist. Skipping evaluation.


 12%|█▏        | 6/50 [01:14<08:54, 12.14s/it]