In [None]:
# scripts/run_deliverybench.ipynb
# -*- coding: utf-8 -*-

import os
os.environ["QT_QPA_PLATFORM"] = "offscreen"
os.environ["OPENROUTER_API_KEY"] = ""

import json
import sys
import threading
import traceback
from pathlib import Path

# -----------------------------------------------------------------------------
# Auto-detect the Food-Delivery-Bench repo root and set paths
# -----------------------------------------------------------------------------
cwd = Path.cwd().resolve()
base_dir = None
for p in [cwd, *cwd.parents]:
    # 1) Current directory is the repo root.
    if (p / "vlm_delivery").is_dir() and (p / "simworld").is_dir():
        base_dir = p
        break
    # 2) Repo root is a direct child of the current path.
    candidate = p / "Food-Delivery-Bench"
    if (candidate / "vlm_delivery").is_dir() and (candidate / "simworld").is_dir():
        base_dir = candidate
        break

if base_dir is None:
    raise RuntimeError("Cannot auto-detect Food-Delivery-Bench root.")

base_dir = str(base_dir)
sys.path.insert(0, base_dir)
sys.path.insert(0, os.path.join(base_dir, "simworld"))

from vlm_delivery.gym_like_interface.gym_like_interface import DeliveryBenchGymEnv
from vlm_delivery.vlm.base_model import BaseModel
from vlm_delivery.utils.trajectory_recorder import save_text
from vlm_delivery.utils.vlm_runtime import (
    sanitize_filename,
    vlm_decide_action_with_retry,
    vlm_collect_images,
    export_vlm_images_debug_once,
)


# -----------------------------------------------------------------------------
# Build a VLM client from `vlm_delivery/input/models.json`
# -----------------------------------------------------------------------------

def build_vlm_client(base_dir: str) -> BaseModel:
    models_path = Path(base_dir) / "vlm_delivery" / "input" / "models.json"
    with models_path.open("r", encoding="utf-8") as f:
        models_cfg = json.load(f) or {}

    agents = models_cfg.get("agents", {}) or {}
    default = models_cfg.get("default", {}) or {}
    agent_cfg = agents.get("1", {}) or {}
    cfg = dict(default)
    cfg.update(agent_cfg)

    provider = (cfg.get("provider") or "openai").lower()
    openai_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_KEY") or ""
    openrouter_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENROUTER_KEY") or ""
    api_key = openai_key if provider == "openai" else openrouter_key
    if not api_key:
        raise RuntimeError(
            f"Missing API key for provider={provider}. "
            "Set OPENAI_API_KEY or OPENROUTER_API_KEY (or *_KEY)."
        )

    return BaseModel(
        url=cfg.get("url", "https://api.openai.com/v1"),
        api_key=api_key,
        model=cfg.get("model", "gpt-4o-mini"),
    )


# -----------------------------------------------------------------------------
# VLM call with parse + retry, reusing DeliveryBench's `vlm_runtime` utilities
# -----------------------------------------------------------------------------


def make_vlm_caller(dm, vlm: BaseModel, env=None):
    """Factory: return a `call_vlm(prompt) -> str` closure.

    When env is provided, image collection runs on the Qt main thread via
    env._invoker so we never touch Qt from a worker thread (avoids crash).
    """

    def _call(prompt: str) -> str:
        if env is not None and getattr(env, "_invoker", None) is not None:
            box = env._invoker.call(lambda: vlm_collect_images(dm))
            images = box["result"] if box.get("ok") else [None, None, None]
        else:
            images = vlm_collect_images(dm)
        return str(vlm.generate(user_prompt=prompt, images=images))

    return _call

# -----------------------------------------------------------------------------
# Main loop: env only executes actions; this file drives the VLM
# -----------------------------------------------------------------------------

def main(max_steps: int = 20, run_mode: str = "auto"):
    exp_cfg_path = os.path.join(base_dir, "vlm_delivery", "input", "experiment_config.json")
    with open(exp_cfg_path, "r", encoding="utf-8") as f:
        exp_cfg = json.load(f) or {}
    gym_env_cfg = exp_cfg.get("gym_env", {}) or {}

    env = DeliveryBenchGymEnv(
        base_dir=base_dir,
        ue_ip=gym_env_cfg.get("ue_ip", "127.0.0.1"),
        ue_port=int(gym_env_cfg.get("ue_port", 9000)),
        sim_tick_ms=100,
        vlm_pump_ms=100,
        enable_viewer=True,
        map_name=gym_env_cfg.get("map_name", "medium-city-22"),
        max_steps=max_steps,
    )

    # Must run on the main thread: create QApplication + invoker.
    env.bootstrap_qt()

    # Run reset() on the MAIN thread so all Qt (viewer, timers) is created here.
    # This avoids "QObject::startTimer: Timers cannot be started from another thread".
    obs, info = env.reset(seed=0)
    print("reset info:", info)
    print("obs:", obs)

    if not env.dms:
        raise RuntimeError("No DeliveryMan instances found after reset().")
    dm = env.dms[0]

    def rl_loop():
        try:
            # External VLM client (decoupled from env)
            vlm = build_vlm_client(base_dir)
            dm._vlm_client = vlm  # for export_vlm_images_debug_once (model name in filenames)
            call_vlm = make_vlm_caller(dm, vlm, env)

            for step_i in range(1, max_steps + 1):
                # Save debug images + prompt for this step (must run on Qt main thread)
                env._invoker.call(lambda: export_vlm_images_debug_once(dm))

                # Decide next action via VLM (with parse + retry handled by `vlm_runtime`)
                dm_action, raw_text, last_err = vlm_decide_action_with_retry(
                    dm,
                    call_vlm,
                    max_retries=2,
                )
                print(f"\n=== Step {step_i} ===")
                print("Chosen action:", dm_action)
                if last_err:
                    print("VLM error hint:", last_err)

                # Save raw output for debugging (similar to DeliveryBench)
                try:
                    model_safe = sanitize_filename(getattr(vlm, "model", "unknown_model"))
                    step_idx = step_i - 1  # 0-based, same as prompt/images
                    filename = f"{model_safe}_{step_idx}_output.txt"
                    save_text(dm.run_dir, filename, raw_text, encoding="utf-8")
                except Exception:
                    pass

                # Execute exactly ONE action step via the gym env
                obs, r, term, trunc, info2 = env.step(dm_action)
                print("info:", info2)
                print("reward:", r, "done:", term, "truncated:", trunc)

                # Any env-level error (including parse_action_failed) is surfaced here
                if info2.get("error"):
                    print("ENV ERROR:", info2["error"])
                    if info2.get("parse_exc"):
                        print("PARSE TRACEBACK:", info2["parse_exc"])
                    break

                if term or trunc:
                    break

        except Exception as e:
            print("[RL] Exception:", e)
            traceback.print_exc()

        finally:
            try:
                env.close()
            except Exception:
                pass
            try:
                env.stop_qt_loop_jupyter()
            except Exception:
                pass

    # Start the RL thread; Qt loop stays on the main thread.
    threading.Thread(target=rl_loop, daemon=True).start()

    # In notebooks: pump Qt events ourselves (do NOT use %gui qt â€” it can crash with offscreen).
    # Main thread blocks here until rl_loop calls env.stop_qt_loop_jupyter().
    if run_mode == "auto":
        run_mode = "jupyter" if "ipykernel" in sys.modules else "script"

    if run_mode == "jupyter":
        env.run_qt_loop_jupyter()
        return

    env.run_qt_loop()


# In notebooks, just call main() directly.
main()


INFO:__init__:230:Got connection confirm: b'connected to gym_citynav'


Loaded bus route: route_bus_1 with 8 stops
Created bus bus_1 on route route_bus_1
=>Info: using ip-port socket


This plugin does not support propagateSizeHints()


reset info: {'sim_time': None, 'seed': 0, 'options': {}, 'sim_tick_ms': 100, 'vlm_pump_ms': 100, 'run_dir': '/home/lingjun/DeliveryBench-official/outputs/trajectories/run_20260209_073723'}
obs: {'state': array([0., 0., 0., 0., 0.], dtype=float32)}


2026-02-09 07:37:35 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] view orders



=== Step 1 ===
Chosen action: DMAction(kind=<DMActionKind.VIEW_ORDERS: 'view_orders'>, data={}, on_done=None)
Starting action: DMAction(kind=<DMActionKind.VIEW_ORDERS: 'view_orders'>, data={}, on_done=None)
DMAction(kind=<DMActionKind.VIEW_ORDERS: 'view_orders'>, data={}, on_done=None)
info: {'sim_time': None, 'elapsed_steps': 1, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'direct_action', 'done0': 0, 'done1': 1}
reward: 0.0 done: False truncated: False

=== Step 2 ===
Chosen action: DMAction(kind=<DMActionKind.ACCEPT_ORDER: 'accept_order'>, data={'oids': [6, 5]}, on_done=None)
Starting action: DMAction(kind=<DMActionKind.ACCEPT_ORDER: 'accept_order'>, data={'oids': [6, 5]}, on_done=None)
DMAction(kind=<DMActionKind.ACCEPT_ORDER: 'accept_order'>, data={'oids': [6, 5]}, on_done=None)


2026-02-09 07:37:48 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] order #6 relative score = 4.70
2026-02-09 07:37:48 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] order #5 relative score = 3.07
2026-02-09 07:37:48 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] accept orders: accepted #6 #5


info: {'sim_time': None, 'elapsed_steps': 2, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'direct_action', 'done0': 1, 'done1': 2}
reward: 0.0 done: False truncated: False


2026-02-09 07:38:01 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] move from (-0.01m, 0.01m) to (-288.53m, -16.58m) [mode=e-scooter, speed=600.0 cm/s, pace=normal]



=== Step 3 ===
Chosen action: DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
Starting action: DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
info: {'sim_time': None, 'elapsed_steps': 3, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'direct_action', 'done0': 2, 'done1': 3}
reward: 0.0 done: False truncated: False


2026-02-09 07:38:40 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] start waiting: 60.0s (~1.0 min) @virtual



=== Step 4 ===
Chosen action: DMAction(kind=<DMActionKind.WAIT: 'wait'>, data={'duration_s': 60.0}, on_done=None)
Starting action: DMAction(kind=<DMActionKind.WAIT: 'wait'>, data={'duration_s': 60.0}, on_done=None)
DMAction(kind=<DMActionKind.WAIT: 'wait'>, data={'duration_s': 60.0}, on_done=None)
info: {'sim_time': None, 'elapsed_steps': 4, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'direct_action', 'done0': 3, 'done1': 4}
reward: 0.0 done: False truncated: False


2026-02-09 07:39:08 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] picked (pending) order #6


DEBUG: PICKUP orders = [6]

=== Step 5 ===
Chosen action: DMAction(kind=<DMActionKind.PICKUP: 'pickup'>, data={'orders': [Order(city_map=<vlm_delivery.map.map.Map object at 0x7fd953362140>, pickup_address=Vector(x=-28852.9, y=-7909.56), delivery_address=Vector(x=-15316.35, y=-12578.8), items=[FoodItem(name='Tea', category='HOT', odor='none', motion_sensitive=False, damage_level=0, nonthermal_time_sensitive=False, prep_time_s=180, serving_temp_c=65.0, safe_min_c=50.0, safe_max_c=70.0, heat_capacity=1.1, temp_c=nan, prepared_at_sim=0.0, picked_at_sim=0.0, delivered_at_sim=0.0, odor_contamination=0.0)], special_note='You can leave it by the door, thank you', path_nodes=[Node(position=Vector(x=-28852.9, y=-1658.0885), type=door), Node(position=Vector(x=-28852.9, y=-1700.0), type=normal), Node(position=Vector(x=-26700.0, y=-1700.0), type=normal), Node(position=Vector(x=-21700.0, y=-1700.0), type=intersection), Node(position=Vector(x=-18300.0, y=-1700.0), type=intersection), Node(position=Ve

2026-02-09 07:39:17 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] placed pending food into bag for orders [6]



=== Step 6 ===
Chosen action: DMAction(kind=<DMActionKind.PLACE_FOOD_IN_BAG: 'place_food_in_bag'>, data={'bag_cmd': 'order 6: 1 -> A'}, on_done=None)
Starting action: DMAction(kind=<DMActionKind.PLACE_FOOD_IN_BAG: 'place_food_in_bag'>, data={'bag_cmd': 'order 6: 1 -> A'}, on_done=None)
DMAction(kind=<DMActionKind.PLACE_FOOD_IN_BAG: 'place_food_in_bag'>, data={'bag_cmd': 'order 6: 1 -> A'}, on_done=None)
info: {'sim_time': None, 'elapsed_steps': 6, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'direct_action', 'done0': 5, 'done1': 6}
reward: 0.0 done: False truncated: False


2026-02-09 07:39:26 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] move from (-287.17m, -16.70m) to (-176.97m, -125.79m) [mode=e-scooter, speed=600.0 cm/s, pace=normal]



=== Step 7 ===
Chosen action: DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -17697.0, 'ty': -12579.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
Starting action: DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -17697.0, 'ty': -12579.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -17697.0, 'ty': -12579.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
info: {'sim_time': None, 'elapsed_steps': 7, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'direct_action', 'done0': 6, 'done1': 7}
reward: 0.0 done: False truncated: False


2026-02-09 07:39:53 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] dropped off order #6 (extra +0.00, stars=3) [time=1, food=4, method=5] [on_time=N, temp=OK, odor=OK, damage=OK]



=== Step 8 ===
Chosen action: DMAction(kind=<DMActionKind.DROP_OFF: 'drop_off'>, data={'oid': 6, 'method': 'leave_at_door'}, on_done=None)
Starting action: DMAction(kind=<DMActionKind.DROP_OFF: 'drop_off'>, data={'oid': 6, 'method': 'leave_at_door'}, on_done=None)
DMAction(kind=<DMActionKind.DROP_OFF: 'drop_off'>, data={'oid': 6, 'method': 'leave_at_door'}, on_done=None)
info: {'sim_time': None, 'elapsed_steps': 8, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'direct_action', 'done0': 7, 'done1': 8}
reward: 0.0 done: False truncated: False


2026-02-09 07:40:03 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] move from (-178.07m, -124.98m) to (-288.53m, -16.58m) [mode=e-scooter, speed=600.0 cm/s, pace=normal]



=== Step 9 ===
Chosen action: DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
Starting action: DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
info: {'sim_time': None, 'elapsed_steps': 9, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'direct_action', 'done0': 8, 'done1': 9}
reward: 0.0 done: False truncated: False


2026-02-09 07:40:29 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] picked (pending) order #5


DEBUG: PICKUP orders = [5]

=== Step 10 ===
Chosen action: DMAction(kind=<DMActionKind.PICKUP: 'pickup'>, data={'orders': [Order(city_map=<vlm_delivery.map.map.Map object at 0x7fd953362140>, pickup_address=Vector(x=-28852.9, y=-7909.56), delivery_address=Vector(x=-30410.07, y=-23859.68), items=[FoodItem(name='PokeBowl', category='COLD', odor='none', motion_sensitive=True, damage_level=0, nonthermal_time_sensitive=True, prep_time_s=300, serving_temp_c=5.0, safe_min_c=50.0, safe_max_c=70.0, heat_capacity=1.05, temp_c=nan, prepared_at_sim=0.0, picked_at_sim=0.0, delivered_at_sim=0.0, odor_contamination=0.0)], special_note='', path_nodes=[Node(position=Vector(x=-28852.9, y=-1658.0885), type=door), Node(position=Vector(x=-28852.9, y=-1700.0), type=normal), Node(position=Vector(x=-26700.0, y=-1700.0), type=normal), Node(position=Vector(x=-21700.0, y=-1700.0), type=intersection), Node(position=Vector(x=-21700.0, y=-3668.781), type=normal), Node(position=Vector(x=-21700.0, y=-8300.0), type=nor

Exception in thread Thread-4 (receive_loop_queue):
Traceback (most recent call last):
  File "/home/lingjun/miniconda3/envs/simworld/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/home/lingjun/miniconda3/envs/simworld/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/home/lingjun/miniconda3/envs/simworld/lib/python3.10/site-packages/unrealcv/__init__.py", line 328, in receive_loop_queue
    raw_message = self.receive()
  File "/home/lingjun/miniconda3/envs/simworld/lib/python3.10/site-packages/unrealcv/__init__.py", line 299, in receive
    self.disconnect()
  File "/home/lingjun/miniconda3/envs/simworld/lib/python3.10/site-packages/unrealcv/__init__.py", line 270, in disconnect
    self.sock.shutdown(socket.SHUT_RD)
OSError: [Errno 107] Transport endpoint is not connected


fail to read raw_magic, exception: [Errno 104] Connection reset by peer
BaseClient: remote disconnected, no more message
