# Example usage

In [None]:
%pip install coderbot_sim

In [None]:
from coderbot_sim import RobotSim, example_maps
from IPython.display import display
import asyncio

widget = RobotSim(example_maps.gen_simple_map(), debugDraw=True, show_controls=True)
display(widget)

async def my_sim():
    print(widget.sensor())
    await widget.step(0.5, forward=True)
    print(widget.sensor())

asyncio.gather(my_sim(),)

RobotSim(debugDraw=True, mapData={'map': [{'type': 'rectangle', 'x': 400, 'y': 0, 'width': 800, 'height': 30, …

<_GatheringFuture pending>

{}
{'lidar': {'angles': [-3.141592653589793, -2.9249310912732556, -2.708269528956718, -2.4916079666401805, -2.2749464043236434, -2.058284842007106, -1.8416232796905683, -1.624961717374031, -1.4083001550574934, -1.191638592740956, -0.9749770304244185, -0.7583154681078814, -0.5416539057913434, -0.3249923434748059, -0.10833078115826877, 0.10833078115826833, 0.3249923434748063, 0.5416539057913439, 0.758315468107881, 0.9749770304244185, 1.191638592740956, 1.4083001550574936, 1.6249617173740303, 1.8416232796905678, 2.0582848420071063, 2.274946404323644, 2.4916079666401814, 2.708269528956718, 2.9249310912732556, 3.141592653589793], 'hitPoints': [{'x': 309.6603199355011, 'y': 268.740277779854}, {'x': 326.7384713415892, 'y': 236.47674936329247}, {'x': 341.8540655138813, 'y': 207.92082486568503}, {'x': 139.52353151774497, 'y': 15}, {'x': 344.677126601384, 'y': 117.29823329543206}, {'x': 379.81643179307133, 'y': 92.38674290739414}, {'x': 402.86251729931735, 'y': 15}, {'x': 459.55576052275296, 'y'

In [2]:
widget.move(**{
  "forward": False,
  "backward": True
})

# Simple _mock_ training

In [3]:
from coderbot_sim import RobotSim, example_maps
from IPython.display import display
import asyncio
import numpy as np
import random

ACTIONS = ["forward", "left", "right"]

q_table = {}
alpha = 0.1  # learning rate
gamma = 0.9  # discount factor
epsilon = 0.1  # exploration rate

def get_state(lidar):
    # Quantize distances into bins
    distances = [np.linalg.norm(p) for p in lidar.get("hitPoints", [])[:8]]
    # Change heuristic based on hitBodies; Goal > Pushable Box > Obstacle > Wall
    bins = np.digitize(distances, [50, 100, 200])
    return tuple(bins)

def choose_action(state):
    if random.random() < epsilon or state not in q_table:
        return random.choice(ACTIONS)
    return max(q_table[state], key=q_table[state].get)

async def take_action(action):
    if action == "forward":
        await widget.step(0.1, forward=True)
    elif action == "left":
        await widget.step(0.2, left=True)
    elif action == "right":
        await widget.step(0.2, right=True)

async def train_q_learning(episodes=5):
    for ep in range(episodes):
        total_reward = 0
        for _ in range(50):
            data = widget.sensor()
            lidar = data.get("lidar", {})
            state = get_state(lidar)

            action = choose_action(state)
            await take_action(action)

            next_data = widget.sensor()
            next_state = get_state(next_data.get("lidar", {}))

            # Reward: penalize close walls
            distances = [np.linalg.norm(p) for p in lidar.get("hitPoints", [])]
            reward = -np.sum(np.array(distances) < 50) * 0.1 + 1.0

            # Update Q-table
            q_table.setdefault(state, {a: 0 for a in ACTIONS})
            q_table.setdefault(next_state, {a: 0 for a in ACTIONS})

            old_value = q_table[state][action]
            next_max = max(q_table[next_state].values())
            q_table[state][action] = old_value + alpha * (reward + gamma * next_max - old_value)

            total_reward += reward

        widget.reset()

        print(f"Episode {ep+1}: total reward = {total_reward:.2f}")

# Create and show robot
widget = RobotSim(example_maps.gen_simple_map(), debugDraw=True, show_controls=True)
display(widget)

# ✅ Run in Jupyter
await train_q_learning()


RobotSim(debugDraw=True, mapData={'map': [{'type': 'rectangle', 'x': 400, 'y': 0, 'width': 800, 'height': 30, …

Episode 1: total reward = 50.00
Episode 2: total reward = 50.00
Episode 3: total reward = 50.00
Episode 4: total reward = 50.00
Episode 5: total reward = 50.00
