In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import pkg_resources
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import gymnasium as gym
import stable_baselines3 as sb3
import stable_baselines3.common.logger as logger
import stable_baselines3.common.callbacks as callbacks
import stable_baselines3.common.env_checker as env_checker
from dm_control import mjcf
from dm_control.rl.control import PhysicsError
import imageio
import scipy.spatial
import torch
import torch.nn as nn
import torch.optim as optim
import torch_geometric as pyg
import torch.nn.functional as F
import torch_geometric.nn as gnn
import torch_geometric.loader as pyg_loader
import pytorch_lightning as pl
import torchmetrics
from torch.utils.data import Dataset
from pathlib import Path
from typing import Tuple, Callable, Optional, List, Union
from tqdm import trange
from dm_control.rl.control import PhysicsError
from PIL import Image

from flygym.arena.mujoco_arena import FlatTerrain
from flygym.envs.nmf_mujoco import NeuroMechFlyMuJoCo, MuJoCoParameters
from flygym.state import stretched_pose
import flygym.util.vision as vision
import flygym.util.config as config
from flygym.arena import BaseArena
from flygym.arena.mujoco_arena import OdorArena, FlatTerrain, GappedTerrain, BlocksTerrain
from flygym.util.data import color_cycle_rgb
from flygym.util.turning_controller import TurningController

from vision_model import VisualFeaturePreprocessor
from navigation_arena import ObstacleOdorArena

pygame 2.5.1 (SDL 2.28.2, Python 3.11.0)
Hello from the pygame community. https://www.pygame.org/contribute.html


  if not hasattr(tensorboard, "__version__") or LooseVersion(
  ) < LooseVersion("1.15"):
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  __import__("pkg_resources").declare_namespace(__name__)
  _PYTHON_LOWER_3_8 = LooseVersion(_PYTHON_VERSION) < LooseVersion("3.8")
  _PYTHON_LOWER_3_8 = LooseVersion(_PYTHON_VERSION) < LooseVersion("3.8")
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  __import__("pkg_resources").declare_namespace(__name__)


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Load retina graph

In [4]:
with open("data/ommatidia_graph.pkl", "rb") as f:
    ommatidia_graph_nx = pickle.load(f)
ommatidia_graph_pg = pyg.utils.from_networkx(ommatidia_graph_nx)

Load visual feature extraction model

In [5]:
vision_model = VisualFeaturePreprocessor.load_from_checkpoint(
    "data/models/visual_preprocessor.pt"
)

Define MDP task

In [6]:
class NMFNavigation(TurningController):
    def __init__(
        self,
        arena,
        vision_model,
        ommatidia_graph,
        device=torch.device("cpu"),
        decision_dt=0.1,
        n_stabilisation_dur=0.3,
        distance_threshold=15,
        max_time=5,
        test_mode=False,
        debug_mode=False,
        **kwargs,
    ) -> None:
        self.debug_mode = debug_mode
        sim_params = MuJoCoParameters(
            render_playspeed=0.1,
            render_camera="birdeye_cam",
            enable_vision=True,
            render_raw_vision=test_mode,
            enable_olfaction=True,
            render_mode="saved" if test_mode else "headless",
            vision_refresh_rate=int(1 / decision_dt),
        )
        super().__init__(
            sim_params=sim_params,
            arena=arena,
            stabilisation_dur=n_stabilisation_dur,
            detect_flip=True,
            **kwargs,
        )

        self.device = device
        self.ommatidia_graphs = [ommatidia_graph.clone(), ommatidia_graph.clone()]
        self.vision_model = vision_model.to(self.device)
        self.max_time = max_time
        self.arena = arena
        self.num_substeps = int(decision_dt / self.timestep)
        self.distance_threshold = distance_threshold

        # Override spaces
        # action space: 2D vector of amplitude and phase for oscillators on each side
        # observation space:
        #  - 2D vector of x-y position of object relative to the fly, norm. to [0, 1]
        #  - scalar probability that there is an object in view, [0, 1]
        #  - 2D vector of mean odor intensity on each side, norm. to [0, 1]
        #  - 2D vector of current oscillator amp. on each side, norm. to [0, 1]
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(2,))
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(7,))

        self._last_fly_tgt_dist = np.linalg.norm(
            np.zeros(2) - self.arena.odor_source[0, :2]
        )
        self._last_action = np.zeros((2,))

    def update_retina_graphs(self, intensities):
        intensities = torch.tensor(intensities, dtype=torch.float32).sum(axis=-1) / 255
        intensities = intensities.to(self.device)
        for i in range(2):
            self.ommatidia_graphs[i].x = intensities[i, :]

    def step(self, amplitude):
        try:
            obstacle_contact_counter = 0
            for i in range(self.num_substeps):
                raw_obs, _, raw_term, raw_trunc, info = super().step(amplitude)
                collision_forces = [
                    np.abs(self.physics.named.data.cfrc_ext[f"obstacle_{j}"]).sum()
                    for j in range(len(self.arena.obstacle_positions))
                ]
                if np.sum(collision_forces) > 1:
                    obstacle_contact_counter += 1
                super().render()
            collision = obstacle_contact_counter > 0.5 * self.num_substeps
        except PhysicsError:
            print("Physics error, resetting environment")
            return np.zeros((7,), dtype="float32"), 0, False, True, {}

        # Check if visual inputs are rendered recently
        assert abs(self.curr_time - self._last_vision_update_time) < 0.5 * self.timestep

        # Parse observations
        self.update_retina_graphs(self.curr_visual_input)
        pos_pred, obj_prob = self.vision_model(*self.ommatidia_graphs)
        pos_pred = pos_pred.detach().numpy().squeeze() / self.distance_threshold
        pos_pred = np.clip(pos_pred, 0, 1)
        obj_prob = obj_prob.detach().numpy()
        odor_intensity = raw_obs["odor_intensity"][0, :].reshape(2, 2).mean(axis=0)
        odor_intensity /= self.arena.peak_odor_intensity[0, 0]
        last_action = self._last_action / 2 + 0.5
        obs = np.concatenate(
            [pos_pred, obj_prob, odor_intensity, last_action], dtype=np.float32
        )

        # Calculate reward
        # calculate distance reward
        fly_pos = super().get_observation()["fly"][0, :2]
        tgt_pos = self.arena.odor_source[0, :2]
        distance = np.linalg.norm(fly_pos - tgt_pos)
        distance_reward = self._last_fly_tgt_dist - distance
        self._last_fly_tgt_dist = distance

        # check if fly is too close to any obstacle
        has_collision = False
        for obst_pos in self.arena.obstacle_positions:
            if np.linalg.norm(fly_pos - obst_pos) < self.arena.obstacle_radius + 1:
                has_collision = True
                break

        # calculate final reward
        if distance < 2:
            reward = 30
            terminated = True
            info["state_desc"] = "success"
        elif collision:
            reward = -5
            terminated = True
            info["state_desc"] = "collision"
        elif info["flip"]:
            reward = -5
            terminated = True
            info["state_desc"] = "flipped"
        else:
            reward = distance_reward
            terminated = False
            info["state_desc"] = "seeking"
        
        # apply penalty for rapid turning
        action_diff = np.abs(amplitude - self._last_action).sum() / 2

        info["distance_reward"] = distance_reward
        info["has_collision"] = has_collision
        info["distance"] = distance
        truncated = (
            self.curr_time > self.max_time and not terminated
        )  # start a new episode

        if self.debug_mode:
            print(f"fly_pos: {fly_pos}, reward={reward}, state={info['state_desc']}")
            if terminated:
                print("terminated")
            if truncated:
                print("truncated")
        
        self._last_action = amplitude
        return obs, reward, terminated, truncated, info

    def reset(self, seed=0):
        super().reset()
        obs = np.array([0, 0, 0, 0, 0, 0.5, 0.5], dtype="float32")
        self._last_fly_tgt_dist = np.linalg.norm(
            np.zeros(2) - self.arena.odor_source[0, :2]
        )
        self._last_action = np.zeros((2,))
        if self.debug_mode:
            print("resetting environment")
        return obs, {"state_desc": "reset"}

In [7]:
terrain_arena = FlatTerrain(ground_alpha=1)
arena = ObstacleOdorArena(
    terrain=terrain_arena,
    obstacle_positions=np.array([(7.5, -2.5), (12.5, 2.5)]),
    odor_source=np.array([[20, 0, 2]]),
    marker_size=0.5,
    obstacle_colors=(0.14, 0.14, 0.2, 1),
)
sim = NMFNavigation(
    spawn_pos=np.array([5, 0, 0]),
    arena=arena,
    vision_model=vision_model,
    ommatidia_graph=ommatidia_graph_pg,
    test_mode=True,
    debug_mode=True,
)
env_checker.check_env(sim)



resetting environment
resetting environment
resetting environment
fly_pos: [ 3.9329195  -0.10393089], reward=3.932663140450657, state=seeking
resetting environment
fly_pos: [4.38542067 0.00805808], reward=4.385419093901756, state=seeking
fly_pos: [5.00995423 0.1175348 ], reward=0.6241896037907111, state=seeking
fly_pos: [ 5.30737244 -0.22641765], reward=0.29646216193157926, state=seeking
fly_pos: [5.5954696  0.01594871], reward=-10, state=collision
terminated
resetting environment
fly_pos: [ 4.28742342 -0.14806542], reward=4.2868941967232175, state=seeking
fly_pos: [ 5.02696505 -0.0588084 ], reward=0.7399842772924003, state=seeking
fly_pos: [5.37436605 0.1219977 ], reward=0.34710840095400286, state=seeking
fly_pos: [ 5.54695655 -0.06689521], reward=-10, state=collision
terminated
resetting environment
fly_pos: [ 4.21534329 -0.26079222], reward=4.213707230717596, state=seeking
fly_pos: [ 4.2685994  -0.44876242], reward=0.05003566780369795, state=seeking


In [8]:
sim.reset()
for i in trange(10):
    obs, reward, terminated, truncated, info = sim.step(np.ones((2,)) * 0.8)
    if terminated:
        break

resetting environment


 10%|█         | 1/10 [00:02<00:24,  2.70s/it]

fly_pos: [ 4.22271406 -0.03561077], reward=4.222683541946729, state=seeking


 20%|██        | 2/10 [00:05<00:21,  2.63s/it]

fly_pos: [4.80596661 0.32148532], reward=0.5807242334746832, state=seeking


 30%|███       | 3/10 [00:07<00:17,  2.54s/it]

fly_pos: [4.79460167 0.90645849], reward=-0.029128747016116563, state=seeking


 30%|███       | 3/10 [00:10<00:24,  3.43s/it]

fly_pos: [5.92755868 0.92293877], reward=-10, state=collision
terminated





In [9]:
sim.save_video("test.mp4", stabilization_time=0)

In [10]:
terrain_arena = FlatTerrain(ground_alpha=1)
arena = ObstacleOdorArena(
    terrain=terrain_arena,
    obstacle_positions=np.array([(7.5, 0), (12.5, 5), (17.5, -5)]),
    odor_source=np.array([[25, 0, 2]]),
    marker_size=0.5,
    obstacle_colors=(0.14, 0.14, 0.2, 1),
)
sim = NMFNavigation(
    arena=arena,
    vision_model=vision_model,
    ommatidia_graph=ommatidia_graph_pg,
    max_time=5,
    test_mode=False,
)
env_checker.check_env(sim)

np.random.seed(0)
sb3.common.utils.set_random_seed(0, using_cuda=True)

start_from = None
train = True

log_dir = "logs/trial_3"
checkpoint_callback = callbacks.CheckpointCallback(
    save_freq=1000,
    save_path=log_dir,
    name_prefix="trial_3",
    save_replay_buffer=True,
    save_vecnormalize=True,
)
my_logger = logger.configure(log_dir, ["tensorboard", "stdout", "csv"])
model = sb3.SAC(
    "MlpPolicy",
    env=sim,
    policy_kwargs={"net_arch": [32, 32]},
    verbose=2,
    learning_rate=0.01,
)
if start_from is not None:
    model = sb3.SAC.load(start_from)
model.set_logger(my_logger)

if train:
    model.learn(total_timesteps=50_000, progress_bar=True, callback=checkpoint_callback)
    model.save("models/trial_3")

Logging to logs/trial_2


Output()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 13       |
|    ep_rew_mean     | -5.5     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 0        |
|    time_elapsed    | 69       |
|    total_timesteps | 52       |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | -6.08    |
| time/              |          |
|    episodes        | 8        |
|    fps             | 0        |
|    time_elapsed    | 127      |
|    total_timesteps | 96       |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 13.2     |
|    ep_rew_mean     | -5.75    |
| time/              |          |
|    episodes        | 12       |
|    fps             | 0        |
|    time_elapsed    | 211      |
|    total_timesteps | 159      |
| train/             |          |
|    actor_loss      | -0.989   |
|    critic_loss     | 6.66     |
|    ent_coef        | 0.57     |
|    ent_coef_loss   | -1.79    |
|    learning_rate   | 0.01     |
|    n_updates       | 58       |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 15.5     |
|    ep_rew_mean     | -5.39    |
| time/              |          |
|    episodes        | 16       |
|    fps             | 0        |
|    time_elapsed    | 334      |
|    total_timesteps | 248      |
| train/             |          |
|    actor_loss      | -0.299   |
|    critic_loss     | 6.87     |
|    ent_coef        | 0.263    |
|    ent_coef_loss   | -3.53    |
|    learning_rate   | 0.01     |
|    n_updates       | 147      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 18.6     |
|    ep_rew_mean     | -5.4     |
| time/              |          |
|    episodes        | 20       |
|    fps             | 0        |
|    time_elapsed    | 505      |
|    total_timesteps | 371      |
| train/             |          |
|    actor_loss      | 0.0787   |
|    critic_loss     | 4.18     |
|    ent_coef        | 0.115    |
|    ent_coef_loss   | -2.82    |
|    learning_rate   | 0.01     |
|    n_updates       | 270      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 17.7     |
|    ep_rew_mean     | -5.22    |
| time/              |          |
|    episodes        | 24       |
|    fps             | 0        |
|    time_elapsed    | 583      |
|    total_timesteps | 425      |
| train/             |          |
|    actor_loss      | 0.501    |
|    critic_loss     | 2.96     |
|    ent_coef        | 0.0963   |
|    ent_coef_loss   | -1.1     |
|    learning_rate   | 0.01     |
|    n_updates       | 324      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 19.6     |
|    ep_rew_mean     | -5.36    |
| time/              |          |
|    episodes        | 28       |
|    fps             | 0        |
|    time_elapsed    | 758      |
|    total_timesteps | 550      |
| train/             |          |
|    actor_loss      | 0.0679   |
|    critic_loss     | 2.4      |
|    ent_coef        | 0.0622   |
|    ent_coef_loss   | -1.38    |
|    learning_rate   | 0.01     |
|    n_updates       | 449      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 23.4     |
|    ep_rew_mean     | -5.6     |
| time/              |          |
|    episodes        | 32       |
|    fps             | 0        |
|    time_elapsed    | 1028     |
|    total_timesteps | 750      |
| train/             |          |
|    actor_loss      | 0.232    |
|    critic_loss     | 2.62     |
|    ent_coef        | 0.0327   |
|    ent_coef_loss   | 0.899    |
|    learning_rate   | 0.01     |
|    n_updates       | 649      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 25.3     |
|    ep_rew_mean     | -6.04    |
| time/              |          |
|    episodes        | 36       |
|    fps             | 0        |
|    time_elapsed    | 1229     |
|    total_timesteps | 910      |
| train/             |          |
|    actor_loss      | 0.569    |
|    critic_loss     | 1.02     |
|    ent_coef        | 0.0209   |
|    ent_coef_loss   | 0.804    |
|    learning_rate   | 0.01     |
|    n_updates       | 809      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26       |
|    ep_rew_mean     | -5.69    |
| time/              |          |
|    episodes        | 40       |
|    fps             | 0        |
|    time_elapsed    | 1392     |
|    total_timesteps | 1041     |
| train/             |          |
|    actor_loss      | 0.723    |
|    critic_loss     | 2.08     |
|    ent_coef        | 0.025    |
|    ent_coef_loss   | 1.28     |
|    learning_rate   | 0.01     |
|    n_updates       | 940      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.9     |
|    ep_rew_mean     | -5.61    |
| time/              |          |
|    episodes        | 44       |
|    fps             | 0        |
|    time_elapsed    | 1462     |
|    total_timesteps | 1097     |
| train/             |          |
|    actor_loss      | 0.984    |
|    critic_loss     | 2.65     |
|    ent_coef        | 0.0263   |
|    ent_coef_loss   | 2.75     |
|    learning_rate   | 0.01     |
|    n_updates       | 996      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 25       |
|    ep_rew_mean     | -5.17    |
| time/              |          |
|    episodes        | 48       |
|    fps             | 0        |
|    time_elapsed    | 1586     |
|    total_timesteps | 1199     |
| train/             |          |
|    actor_loss      | 0.984    |
|    critic_loss     | 2.96     |
|    ent_coef        | 0.0458   |
|    ent_coef_loss   | 1.07     |
|    learning_rate   | 0.01     |
|    n_updates       | 1098     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.8     |
|    ep_rew_mean     | -5.31    |
| time/              |          |
|    episodes        | 52       |
|    fps             | 0        |
|    time_elapsed    | 1700     |
|    total_timesteps | 1291     |
| train/             |          |
|    actor_loss      | 1.09     |
|    critic_loss     | 2.1      |
|    ent_coef        | 0.0549   |
|    ent_coef_loss   | 0.0462   |
|    learning_rate   | 0.01     |
|    n_updates       | 1190     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 25.9     |
|    ep_rew_mean     | -5.58    |
| time/              |          |
|    episodes        | 56       |
|    fps             | 0        |
|    time_elapsed    | 1898     |
|    total_timesteps | 1453     |
| train/             |          |
|    actor_loss      | 1.12     |
|    critic_loss     | 1.59     |
|    ent_coef        | 0.0435   |
|    ent_coef_loss   | -0.369   |
|    learning_rate   | 0.01     |
|    n_updates       | 1352     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 25.6     |
|    ep_rew_mean     | -5.68    |
| time/              |          |
|    episodes        | 60       |
|    fps             | 0        |
|    time_elapsed    | 2006     |
|    total_timesteps | 1539     |
| train/             |          |
|    actor_loss      | 1.51     |
|    critic_loss     | 1.93     |
|    ent_coef        | 0.0354   |
|    ent_coef_loss   | -1.1     |
|    learning_rate   | 0.01     |
|    n_updates       | 1438     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.8     |
|    ep_rew_mean     | -5.62    |
| time/              |          |
|    episodes        | 64       |
|    fps             | 0        |
|    time_elapsed    | 2068     |
|    total_timesteps | 1584     |
| train/             |          |
|    actor_loss      | 1.39     |
|    critic_loss     | 2.68     |
|    ent_coef        | 0.0394   |
|    ent_coef_loss   | 0.335    |
|    learning_rate   | 0.01     |
|    n_updates       | 1483     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.1     |
|    ep_rew_mean     | -5.56    |
| time/              |          |
|    episodes        | 68       |
|    fps             | 0        |
|    time_elapsed    | 2143     |
|    total_timesteps | 1642     |
| train/             |          |
|    actor_loss      | 1.53     |
|    critic_loss     | 2.41     |
|    ent_coef        | 0.0459   |
|    ent_coef_loss   | -0.795   |
|    learning_rate   | 0.01     |
|    n_updates       | 1541     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 23.5     |
|    ep_rew_mean     | -5.51    |
| time/              |          |
|    episodes        | 72       |
|    fps             | 0        |
|    time_elapsed    | 2207     |
|    total_timesteps | 1689     |
| train/             |          |
|    actor_loss      | 1.36     |
|    critic_loss     | 2.07     |
|    ent_coef        | 0.0396   |
|    ent_coef_loss   | -0.405   |
|    learning_rate   | 0.01     |
|    n_updates       | 1588     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 23.2     |
|    ep_rew_mean     | -4.69    |
| time/              |          |
|    episodes        | 76       |
|    fps             | 0        |
|    time_elapsed    | 2305     |
|    total_timesteps | 1765     |
| train/             |          |
|    actor_loss      | 1.72     |
|    critic_loss     | 3.11     |
|    ent_coef        | 0.0304   |
|    ent_coef_loss   | -0.205   |
|    learning_rate   | 0.01     |
|    n_updates       | 1664     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.1     |
|    ep_rew_mean     | -4.75    |
| time/              |          |
|    episodes        | 80       |
|    fps             | 0        |
|    time_elapsed    | 2506     |
|    total_timesteps | 1927     |
| train/             |          |
|    actor_loss      | 1.81     |
|    critic_loss     | 3.17     |
|    ent_coef        | 0.0474   |
|    ent_coef_loss   | -0.113   |
|    learning_rate   | 0.01     |
|    n_updates       | 1826     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.1     |
|    ep_rew_mean     | -4.94    |
| time/              |          |
|    episodes        | 84       |
|    fps             | 0        |
|    time_elapsed    | 2627     |
|    total_timesteps | 2024     |
| train/             |          |
|    actor_loss      | 1.94     |
|    critic_loss     | 2.96     |
|    ent_coef        | 0.0499   |
|    ent_coef_loss   | -0.265   |
|    learning_rate   | 0.01     |
|    n_updates       | 1923     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 23.8     |
|    ep_rew_mean     | -5.06    |
| time/              |          |
|    episodes        | 88       |
|    fps             | 0        |
|    time_elapsed    | 2714     |
|    total_timesteps | 2095     |
| train/             |          |
|    actor_loss      | 1.64     |
|    critic_loss     | 2.59     |
|    ent_coef        | 0.0368   |
|    ent_coef_loss   | 1.06     |
|    learning_rate   | 0.01     |
|    n_updates       | 1994     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.1     |
|    ep_rew_mean     | -5.12    |
| time/              |          |
|    episodes        | 92       |
|    fps             | 0        |
|    time_elapsed    | 2865     |
|    total_timesteps | 2219     |
| train/             |          |
|    actor_loss      | 2.1      |
|    critic_loss     | 2.62     |
|    ent_coef        | 0.0326   |
|    ent_coef_loss   | -1.47    |
|    learning_rate   | 0.01     |
|    n_updates       | 2118     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 25.2     |
|    ep_rew_mean     | -5.58    |
| time/              |          |
|    episodes        | 96       |
|    fps             | 0        |
|    time_elapsed    | 3106     |
|    total_timesteps | 2419     |
| train/             |          |
|    actor_loss      | 2.2      |
|    critic_loss     | 2.79     |
|    ent_coef        | 0.0397   |
|    ent_coef_loss   | 0.227    |
|    learning_rate   | 0.01     |
|    n_updates       | 2318     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 25.5     |
|    ep_rew_mean     | -5.61    |
| time/              |          |
|    episodes        | 100      |
|    fps             | 0        |
|    time_elapsed    | 3271     |
|    total_timesteps | 2554     |
| train/             |          |
|    actor_loss      | 2.31     |
|    critic_loss     | 1.82     |
|    ent_coef        | 0.03     |
|    ent_coef_loss   | 0.418    |
|    learning_rate   | 0.01     |
|    n_updates       | 2453     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27       |
|    ep_rew_mean     | -6       |
| time/              |          |
|    episodes        | 104      |
|    fps             | 0        |
|    time_elapsed    | 3512     |
|    total_timesteps | 2754     |
| train/             |          |
|    actor_loss      | 2.78     |
|    critic_loss     | 2.04     |
|    ent_coef        | 0.034    |
|    ent_coef_loss   | 0.0509   |
|    learning_rate   | 0.01     |
|    n_updates       | 2653     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.6     |
|    ep_rew_mean     | -6.24    |
| time/              |          |
|    episodes        | 108      |
|    fps             | 0        |
|    time_elapsed    | 3754     |
|    total_timesteps | 2954     |
| train/             |          |
|    actor_loss      | 3.35     |
|    critic_loss     | 1.63     |
|    ent_coef        | 0.0213   |
|    ent_coef_loss   | -0.28    |
|    learning_rate   | 0.01     |
|    n_updates       | 2853     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 29.6     |
|    ep_rew_mean     | -6.08    |
| time/              |          |
|    episodes        | 112      |
|    fps             | 0        |
|    time_elapsed    | 3962     |
|    total_timesteps | 3122     |
| train/             |          |
|    actor_loss      | 3.38     |
|    critic_loss     | 1.46     |
|    ent_coef        | 0.0196   |
|    ent_coef_loss   | 0.646    |
|    learning_rate   | 0.01     |
|    n_updates       | 3021     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 30.4     |
|    ep_rew_mean     | -6.44    |
| time/              |          |
|    episodes        | 116      |
|    fps             | 0        |
|    time_elapsed    | 4172     |
|    total_timesteps | 3284     |
| train/             |          |
|    actor_loss      | 3.55     |
|    critic_loss     | 1.69     |
|    ent_coef        | 0.023    |
|    ent_coef_loss   | 0.48     |
|    learning_rate   | 0.01     |
|    n_updates       | 3183     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 30.8     |
|    ep_rew_mean     | -6.73    |
| time/              |          |
|    episodes        | 120      |
|    fps             | 0        |
|    time_elapsed    | 4383     |
|    total_timesteps | 3448     |
| train/             |          |
|    actor_loss      | 3.42     |
|    critic_loss     | 2.36     |
|    ent_coef        | 0.0237   |
|    ent_coef_loss   | -0.322   |
|    learning_rate   | 0.01     |
|    n_updates       | 3347     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31.6     |
|    ep_rew_mean     | -6.57    |
| time/              |          |
|    episodes        | 124      |
|    fps             | 0        |
|    time_elapsed    | 4554     |
|    total_timesteps | 3581     |
| train/             |          |
|    actor_loss      | 3.93     |
|    critic_loss     | 2        |
|    ent_coef        | 0.0344   |
|    ent_coef_loss   | 0.814    |
|    learning_rate   | 0.01     |
|    n_updates       | 3480     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31.2     |
|    ep_rew_mean     | -6.74    |
| time/              |          |
|    episodes        | 128      |
|    fps             | 0        |
|    time_elapsed    | 4670     |
|    total_timesteps | 3672     |
| train/             |          |
|    actor_loss      | 4.15     |
|    critic_loss     | 1.94     |
|    ent_coef        | 0.0291   |
|    ent_coef_loss   | 0.405    |
|    learning_rate   | 0.01     |
|    n_updates       | 3571     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 30.8     |
|    ep_rew_mean     | -6.76    |
| time/              |          |
|    episodes        | 132      |
|    fps             | 0        |
|    time_elapsed    | 4876     |
|    total_timesteps | 3834     |
| train/             |          |
|    actor_loss      | 4.47     |
|    critic_loss     | 1.54     |
|    ent_coef        | 0.0398   |
|    ent_coef_loss   | -0.103   |
|    learning_rate   | 0.01     |
|    n_updates       | 3733     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 30.9     |
|    ep_rew_mean     | -6.73    |
| time/              |          |
|    episodes        | 136      |
|    fps             | 0        |
|    time_elapsed    | 5082     |
|    total_timesteps | 3997     |
| train/             |          |
|    actor_loss      | 4.46     |
|    critic_loss     | 1.33     |
|    ent_coef        | 0.0219   |
|    ent_coef_loss   | 0.324    |
|    learning_rate   | 0.01     |
|    n_updates       | 3896     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 30.5     |
|    ep_rew_mean     | -6.7     |
| time/              |          |
|    episodes        | 140      |
|    fps             | 0        |
|    time_elapsed    | 5206     |
|    total_timesteps | 4092     |
| train/             |          |
|    actor_loss      | 4.65     |
|    critic_loss     | 1.57     |
|    ent_coef        | 0.0312   |
|    ent_coef_loss   | -0.251   |
|    learning_rate   | 0.01     |
|    n_updates       | 3991     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31.6     |
|    ep_rew_mean     | -6.8     |
| time/              |          |
|    episodes        | 144      |
|    fps             | 0        |
|    time_elapsed    | 5409     |
|    total_timesteps | 4253     |
| train/             |          |
|    actor_loss      | 4.67     |
|    critic_loss     | 1.71     |
|    ent_coef        | 0.0271   |
|    ent_coef_loss   | 1.37     |
|    learning_rate   | 0.01     |
|    n_updates       | 4152     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31.9     |
|    ep_rew_mean     | -7.04    |
| time/              |          |
|    episodes        | 148      |
|    fps             | 0        |
|    time_elapsed    | 5581     |
|    total_timesteps | 4391     |
| train/             |          |
|    actor_loss      | 5.08     |
|    critic_loss     | 1.55     |
|    ent_coef        | 0.0287   |
|    ent_coef_loss   | -0.0671  |
|    learning_rate   | 0.01     |
|    n_updates       | 4290     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 32.7     |
|    ep_rew_mean     | -7.11    |
| time/              |          |
|    episodes        | 152      |
|    fps             | 0        |
|    time_elapsed    | 5810     |
|    total_timesteps | 4561     |
| train/             |          |
|    actor_loss      | 5.12     |
|    critic_loss     | 1.82     |
|    ent_coef        | 0.0444   |
|    ent_coef_loss   | 0.439    |
|    learning_rate   | 0.01     |
|    n_updates       | 4460     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 32.2     |
|    ep_rew_mean     | -7.19    |
| time/              |          |
|    episodes        | 156      |
|    fps             | 0        |
|    time_elapsed    | 5960     |
|    total_timesteps | 4678     |
| train/             |          |
|    actor_loss      | 4.95     |
|    critic_loss     | 1.53     |
|    ent_coef        | 0.0324   |
|    ent_coef_loss   | 0.263    |
|    learning_rate   | 0.01     |
|    n_updates       | 4577     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 32.9     |
|    ep_rew_mean     | -7.35    |
| time/              |          |
|    episodes        | 160      |
|    fps             | 0        |
|    time_elapsed    | 6146     |
|    total_timesteps | 4825     |
| train/             |          |
|    actor_loss      | 5.74     |
|    critic_loss     | 1.44     |
|    ent_coef        | 0.029    |
|    ent_coef_loss   | 1.04     |
|    learning_rate   | 0.01     |
|    n_updates       | 4724     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 33.4     |
|    ep_rew_mean     | -7.2     |
| time/              |          |
|    episodes        | 164      |
|    fps             | 0        |
|    time_elapsed    | 6275     |
|    total_timesteps | 4927     |
| train/             |          |
|    actor_loss      | 5.68     |
|    critic_loss     | 1.89     |
|    ent_coef        | 0.0388   |
|    ent_coef_loss   | -0.824   |
|    learning_rate   | 0.01     |
|    n_updates       | 4826     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.2     |
|    ep_rew_mean     | -7.46    |
| time/              |          |
|    episodes        | 168      |
|    fps             | 0        |
|    time_elapsed    | 6445     |
|    total_timesteps | 5064     |
| train/             |          |
|    actor_loss      | 6.13     |
|    critic_loss     | 1.7      |
|    ent_coef        | 0.0188   |
|    ent_coef_loss   | -2.36    |
|    learning_rate   | 0.01     |
|    n_updates       | 4963     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 35.4     |
|    ep_rew_mean     | -7.8     |
| time/              |          |
|    episodes        | 172      |
|    fps             | 0        |
|    time_elapsed    | 6664     |
|    total_timesteps | 5231     |
| train/             |          |
|    actor_loss      | 6.23     |
|    critic_loss     | 1.72     |
|    ent_coef        | 0.0238   |
|    ent_coef_loss   | -0.0745  |
|    learning_rate   | 0.01     |
|    n_updates       | 5130     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 35.6     |
|    ep_rew_mean     | -8.35    |
| time/              |          |
|    episodes        | 176      |
|    fps             | 0        |
|    time_elapsed    | 6796     |
|    total_timesteps | 5323     |
| train/             |          |
|    actor_loss      | 6.41     |
|    critic_loss     | 1.8      |
|    ent_coef        | 0.0147   |
|    ent_coef_loss   | -0.484   |
|    learning_rate   | 0.01     |
|    n_updates       | 5222     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.5     |
|    ep_rew_mean     | -8.28    |
| time/              |          |
|    episodes        | 180      |
|    fps             | 0        |
|    time_elapsed    | 6861     |
|    total_timesteps | 5373     |
| train/             |          |
|    actor_loss      | 6.01     |
|    critic_loss     | 2.36     |
|    ent_coef        | 0.0129   |
|    ent_coef_loss   | 1.03     |
|    learning_rate   | 0.01     |
|    n_updates       | 5272     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.4     |
|    ep_rew_mean     | -8.18    |
| time/              |          |
|    episodes        | 184      |
|    fps             | 0        |
|    time_elapsed    | 6982     |
|    total_timesteps | 5464     |
| train/             |          |
|    actor_loss      | 6.71     |
|    critic_loss     | 1.18     |
|    ent_coef        | 0.0373   |
|    ent_coef_loss   | 1.03     |
|    learning_rate   | 0.01     |
|    n_updates       | 5363     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.1     |
|    ep_rew_mean     | -8.06    |
| time/              |          |
|    episodes        | 188      |
|    fps             | 0        |
|    time_elapsed    | 7037     |
|    total_timesteps | 5504     |
| train/             |          |
|    actor_loss      | 6.78     |
|    critic_loss     | 1.15     |
|    ent_coef        | 0.0462   |
|    ent_coef_loss   | -0.945   |
|    learning_rate   | 0.01     |
|    n_updates       | 5403     |
---------------------------------


In [None]:
np.abs(sim.physics.named.data.cfrc_ext["obstacle_0"]).sum()

23.330371908742478