### Create environment

In [1]:
import gymnasium as gym
import numpy as np
from gymnasium import spaces
from stable_baselines3.common.env_checker import check_env

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

import gym_kilobots

In [2]:
# Define four objects in a centered square
square_objects = [
    ((-0.25,  0.25), 0.0),   # top-left
    (( 0.25,  0.25), 0.0),   # top-right
    (( 0.25, -0.25), 0.0),   # bottom-right
    ((-0.25, -0.25), 0.0),   # bottom-left
]

# Define 1 object in the center of the environment
single_center_object = [
    ((0.0, 0.0), 0.0)        # center,
]

# Choose a fixed light position
light_pos = (-0.5, 0.5)

# Choose four explicit kilobot positions
kb_positions = [
    (-0.50, 0.60),
    (-0.50, 0.60),
    (-0.60, 0.50),
    (-0.60, 0.50),
]

# NOTE: if you don't do this, the environment will randomly place the objects
env = gym.make(
    'Kilobots-QuadAssembly-v0',
    render_mode='human',
    num_kilobots=len(kb_positions),
    object_config=single_center_object,
    light_position=light_pos,
    kilobot_positions=kb_positions
)


In [3]:
check_env(env,warn=True)



In [None]:
#vec_env = make_vec_env(env, n_envs = 1)

# Train the agent
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500000)
model.save("swarm_ppo")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 500      |
|    ep_rew_mean     | 0.0114   |
| time/              |          |
|    fps             | 490      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 500          |
|    ep_rew_mean          | 0.0116       |
| time/                   |              |
|    fps                  | 436          |
|    iterations           | 2            |
|    time_elapsed         | 9            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0076027685 |
|    clip_fraction        | 0.024        |
|    clip_range           | 0.2          |
|    en

In [None]:
model = PPO.load("swarm_ppo")

def controller(x):
      action,states = model.predict(x)
      return action

running = True
obs, info = env.reset()

while running:

        env.render()

        x = obs
        action = controller(x)
        obs, reward, terminated, truncated, info = env.step(action)

        # Print the orientation of the objects using the unwrapped environment
        print(env.unwrapped.get_objects_status())

        # End episode
        if terminated or truncated:
            break

env.close()  # Ensure the environment is properly closed



['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']
['object 0: position: [0. 0.], orientation: 0.0']


: 