# Notebook 12: Train VLA Policy with Llama 3.1

This notebook demonstrates how to train a **Vision-Language-Action (VLA)** model using **Llama 3.1** as the reasoning backbone.
We fine-tune the model to map (Observation + Instruction) -> (Safe Navigation Action).

**Architecture**:
- **Base Model**: Llama-3.1-8B-Instruct (4-bit Quantized for Colab T4).
- **Input**: "<image_embedding> Context: You are a safe robot in a hospital. Goal: [18, 10]. Nearest Obstacle: Person at 1.5m."
- **Output**: "Action: Forward 0.3 Turn 0.1"

**Dataset**: Generated `dataset_v0.1` (Hospital Benchmark).

In [None]:
# Colab Setup
!pip install -q transformers accelerate bitsandbytes peft datasets pandas matplotlib

In [None]:
import os
import sys
import pandas as pd
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

# Add src to path
try:
    sys.path.append(os.path.abspath('../src'))
except:
    pass # Colab path might differ

DATASET_DIR = "../data/dataset_v0.1"
MODEL_ID = "meta-llama/Meta-Llama-3.1-8B-Instruct" # Requires HuggingFace Login

## 1. Prepare Data for Llama
Convert CSV trajectories into Instruction Tuning format.
Format: `User: <State Description> \n Assistant: <Action>`

In [None]:
def format_instruction(row, goal, nearest_dist):
    # Simple text representation of state
    # ideally we would use the image (Grid Map) + Projection
    # For V0.1 we use text description of state
    prompt = f"""Control a robot in a hospital.
State: x={row['x']:.2f}, y={row['y']:.2f}, theta={row['theta']:.2f}.
Goal: x={goal[0]:.2f}, y={goal[1]:.2f}.
Sensors: Nearest obstacle at {nearest_dist:.2f}m.
Output the linear and angular velocity safely."""
    return prompt

def format_output(row):
    return f"Action: v_lin={row['v_lin']:.2f}, v_ang={row['v_ang']:.2f}"

train_samples = []

if os.path.exists(DATASET_DIR):
    worlds = os.listdir(DATASET_DIR)
    for w in worlds[:5]: # load subset
        w_path = os.path.join(DATASET_DIR, w)
        if not os.path.isdir(w_path): continue
        
        ep_dir = os.path.join(w_path, "episodes")
        if not os.path.exists(ep_dir): continue
        
        for ep in os.listdir(ep_dir):
            if not ep.endswith("_log.csv"): continue
            df = pd.read_csv(os.path.join(ep_dir, ep))
            # Sample 10% of frames to reduce size
            df_sub = df.iloc[::10]
            
            for _, row in df_sub.iterrows():
                # Mock nearest dist (would calculate real)
                d_mock = 2.0 
                prompt = format_instruction(row, (18, 10), d_mock)
                completion = format_output(row)
                train_samples.append({"text": f"User: {prompt}\nAssistant: {completion}"})
                
print(f"Prepared {len(train_samples)} training samples.")
print("Example:", train_samples[0]['text'])

## 2. Load Model (Quantized)

In [None]:
# Note: In Colab, you need to login to HF first using notebook_login()
# from huggingface_hub import notebook_login
# notebook_login()

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

# Placeholder for actual loading - Uncomment in Colab
# model = AutoModelForCausalLM.from_pretrained(MODEL_ID, quantization_config=bnb_config, device_map="auto")
# tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
# tokenizer.pad_token = tokenizer.eos_token

## 3. LoRA Training Setup

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# model = get_peft_model(model, peft_config)
# print("Model ready for training.")