# Notebook 13: Deploy VLA Policy (Inference)

This notebook demonstrates **Closed-Loop Control** using the trained **Llama 3.1 VLA Policy**.
It loads the LoRA adapter trained in Notebook 12 and runs it in the simulator.

**Pipeline**:
1. **Observation**: Robot State + Semantic Map (Text Description).
2. **Reasoning**: VLA Model generates action text (e.g., "Action: v_lin=0.3, v_ang=0.1").
3. **Execution**: Parse text -> Twist Command -> Simulator Step.

In [None]:
# Colab Setup
!pip install -q transformers accelerate bitsandbytes peft pandas matplotlib

In [None]:
import sys
import os
import torch
import re
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# Add src to path
try:
    sys.path.append(os.path.abspath('../src'))
except:
    pass

from simulation.episode_runner import EpisodeRunner, RobotState
from world_gen.hospital_generator import HospitalGenerator

## 1. Load Trained Model

In [None]:
MODEL_ID = "meta-llama/Meta-Llama-3.1-8B-Instruct"
# ADAPTER_PATH = "../models/vla_llama_v1" # Result from Nb 12

# Mock Loading for CI/Demo without GPU
class MockLLM:
    def generate(self, prompt):
        return "Action: v_lin=0.2, v_ang=0.0"

use_mock = not torch.cuda.is_available()

if use_mock:
    print("GPU not found. Using Mock LLM for logic verification.")
    model = MockLLM()
else:
    # Real Loading Logic
    # bnb_config = BitsAndBytesConfig(load_in_4bit=True, ...)
    # base_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, ...)
    # model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
    pass

## 2. Define Policy Wrapper
Converts Robot State to Prompt, queries LLM, parses Action.

In [None]:
class VLAPolicy:
    def __init__(self, model):
        self.model = model
        
    def get_action(self, state: RobotState, goal: tuple, nearest_dist: float):
        # 1. Create Prompt (Must match training distribution)
        prompt = f"""Control a robot in a hospital.
State: x={state.x:.2f}, y={state.y:.2f}, theta={state.theta:.2f}.
Goal: x={goal[0]:.2f}, y={goal[1]:.2f}.
Sensors: Nearest obstacle at {nearest_dist:.2f}m.
Output the linear and angular velocity safely."""
        
        # 2. Query Model
        if use_mock:
            output_text = self.model.generate(prompt)
        else:
            # inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
            # outputs = model.generate(**inputs, max_new_tokens=20)
            # output_text = tokenizer.decode(outputs[0])
            pass
            
        # 3. Parse Output
        # Expected: "Action: v_lin=0.2, v_ang=0.1"
        try:
            v_lin = float(re.search(r"v_lin=([0-9.-]+)", output_text).group(1))
            v_ang = float(re.search(r"v_ang=([0-9.-]+)", output_text).group(1))
        except:
            print(f"Failed to parse: {output_text}")
            v_lin, v_ang = 0.0, 0.0 # Fail-safe stop
            
        return v_lin, v_ang

## 3. Run Evaluation Episode

In [None]:
# Setup World
gen = HospitalGenerator(width=20, height=20)
gen.initialize_map()
gen.generate_layout(num_wards=1)
world_config = gen.to_dict()

runner = EpisodeRunner(world_config)
policy = VLAPolicy(model)

# Run Loop
import math
start = (2.0, 10.0, 0.0)
goal = (18.0, 10.0, 0.0)

x, y, theta = start
t = 0.0
dt = 0.1
trajectory = []

print("Starting VLA Inference Loop...")
for _ in range(50): # Short test
    # Mock Sensor
    d_nearest = 2.0
    
    state = RobotState(t, x, y, theta, 0, 0)
    v, w = policy.get_action(state, goal, d_nearest)
    
    # Step
    x += v * math.cos(theta) * dt
    y += v * math.sin(theta) * dt
    theta += w * dt
    t += dt
    trajectory.append({'x': x, 'y': y})
    
print(f"Finished. Final Pose: ({x:.2f}, {y:.2f})")