Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions challenge/onsite_competition/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ In this phase, participants’ models will be deployed on **a real robot** to ev

---

## Robot
The robot uses a wheeled chassis. The chassis and camera control code can be found in the provided SDK. The camera’s default resolution is 640×480, and the depth and color images are already aligned.

The robot is a agilex (RANGER MINI 3.0)[https://www.agilex.ai/solutions/1] with RGB camera and a LiDAR sensor.

## ⚙️ Installation

First, install the `InternNav` package:
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
41 changes: 31 additions & 10 deletions challenge/onsite_competition/sdk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys

from real_world_env import RealWorldEnv
from stream import app, start_env

from internnav.agent.utils.client import AgentClient
from internnav.configs.evaluator.default_config import get_config
Expand All @@ -21,6 +22,7 @@ def parse_args():
type=str,
help='current instruction to follow',
)
parser.add_argument("--tag", type=str, help="tag for the run, saved by the tag name which is team-task-trail")
return parser.parse_args()


Expand All @@ -32,6 +34,7 @@ def load_eval_cfg(config_path, attr_name='eval_cfg'):
return getattr(config_module, attr_name)


# TODO add logging for each step, saved by the tag name which is team-task-trail
def main():
args = parse_args()
print("--- Loading config from:", args.config, "---")
Expand All @@ -43,16 +46,34 @@ def main():
agent = AgentClient(cfg.agent)

# initialize real world env
env = RealWorldEnv(args.instruction)
env = RealWorldEnv()

while True:
# print("get observation...")
# obs contains {rgb, depth, instruction}
obs = env.get_observation()
# start stream
start_env(env)
app.run(host="0.0.0.0", port=8080, threaded=True)

# print("agent step...")
# action is a integer in [0, 3], agent return [{'action': [int], 'ideal_flag': bool}] (same to internvla_n1 agent)
action = agent.step(obs)[0]['action'][0] # only take the first env's action integer
try:
while True:
# print("get observation...")
# obs contains {rgb, depth, instruction}
obs = env.get_observation()
obs["instruction"] = args.instruction

# print("env step...")
env.step(action)
# print("agent step...")
# action is a integer in [0, 3], agent return [{'action': [int], 'ideal_flag': bool}] (same to internvla_n1 agent)
try:
action = agent.step(obs)[0]['action'][0]
print(f"agent step success, action is {action}")
except Exception as e:
print(f"agent step error {e}")
continue

# print("env step...")
try:
env.step(action)
print("env step success")
except Exception as e:
print(f"env step error {e}")
continue
finally:
env.close()
60 changes: 47 additions & 13 deletions challenge/onsite_competition/sdk/real_world_env.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,56 @@
import threading
import time

from cam import AlignedRealSense
from control import DiscreteRobotController

from internnav.env import Env


class RealWorldEnv(Env):
def __init__(self):
def __init__(self, fps: int = 30):
self.node = DiscreteRobotController()
self.cam = AlignedRealSense()
self.latest_obs = None
self.lock = threading.Lock()
self.stop_flag = threading.Event()
self.fps = fps

# 启动相机
self.cam.start()
# 启动采集线程
self.thread = threading.Thread(target=self._capture_loop, daemon=True)
self.thread.start()

def _capture_loop(self):
"""keep capturing frames"""
interval = 1.0 / self.fps
while not self.stop_flag.is_set():
t0 = time.time()
try:
obs = self.cam.get_observation(timeout_ms=1000)
with self.lock:
self.latest_obs = obs
except Exception as e:
print("Camera capture failed:", e)
time.sleep(0.05)
dt = time.time() - t0
if dt < interval:
time.sleep(interval - dt)

def get_observation(self):
frame = self.cam.get_observation()
return frame

def step(self, action):

'''
action (int): Discrete action to apply:
- 0: no movement (stand still)
- 1: move forward
- 2: rotate left
- 3: rotate right
'''
"""return most recent frame"""
with self.lock:
return self.latest_obs

def step(self, action: int):
"""
action:
0: stand still
1: move forward
2: turn left
3: turn right
"""
if action == 0:
self.node.stand_still()
elif action == 1:
Expand All @@ -30,3 +59,8 @@ def step(self, action):
self.node.turn_left()
elif action == 3:
self.node.turn_right()

def close(self):
self.stop_flag.set()
self.thread.join(timeout=1.0)
self.cam.stop()
38 changes: 38 additions & 0 deletions challenge/onsite_competition/sdk/stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# stream_server.py
import time

import cv2
from flask import Flask, Response

app = Flask(__name__)

# 由主程序注入
_env = None


def set_env(env):
"""set env from main to stream server"""
global _env
_env = env


def _mjpeg_generator(jpeg_quality: int = 80):
boundary = b"--frame"
while True:
if _env is None:
time.sleep(0.1)
continue
obs = _env.get_observation()
if obs is None:
time.sleep(0.01)
continue
frame_bgr = obs["rgb"]
ok, jpg = cv2.imencode(".jpg", frame_bgr, [cv2.IMWRITE_JPEG_QUALITY, jpeg_quality])
if not ok:
continue
yield (boundary + b"\r\n" b"Content-Type: image/jpeg\r\n\r\n" + jpg.tobytes() + b"\r\n")


@app.route("/stream")
def stream():
return Response(_mjpeg_generator(), mimetype="multipart/x-mixed-replace; boundary=frame")