In [1]:
import gymnasium as gym
import numpy as np
import torch
import open3d as o3d
from gymnasium import spaces
import copy
import matplotlib.pyplot as plt

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [9]:
class CoverageEnv(gym.Env):
    def __init__(self, obj_file_path, radius=20.0, coverage_threshold=0.98):
        super(CoverageEnv, self).__init__()

        self.mesh = o3d.io.read_triangle_mesh(obj_file_path)
        self.mesh.compute_vertex_normals()
        
        self.vertices = torch.tensor(np.asarray(self.mesh.vertices), dtype=torch.float32).cuda()
        self.faces = torch.tensor(np.asarray(self.mesh.triangles), dtype=torch.int64).cuda()
        self.normals = torch.tensor(np.asarray(self.mesh.vertex_normals), dtype=torch.float32).cuda()
        
        self.num_faces = self.faces.shape[0]
        self.radius = radius
        self.coverage_threshold = coverage_threshold

        self.observation_space = spaces.Box(low=0, high=1, shape=(self.num_faces,), dtype=np.int8)
        self.action_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)

        self.covered_faces = torch.zeros(self.num_faces, dtype=torch.bool).cuda()
        self.total_covered = 0
        self.steps = 0

        # self.vis = o3d.visualization.Visualizer()
        # self.vis.create_window(visible=False)
        # self.vis.add_geometry(self.mesh)
        # self.ctr = self.vis.get_view_control()
        # self.camera = self.vis.get_view_control().convert_to_pinhole_camera_parameters()
        self.actions = []

        self.reward_weights = np.array([1,     # w_newly_covered, 
                                       100.0,    # w_threshold_bonus, 
                                       -10.0,    # w_step_penalty, 
                                    ])

    def reset(self, seed=None):
        super().reset(seed=seed)
        self.covered_faces.fill_(False)
        self.total_covered = 0
        self.steps = 0
        return self._get_obs(), {}

    def step(self, action):
        self.last_action = action
        self.actions.append(action)
        self.steps += 1
        
        action = torch.tensor(action, dtype=torch.float32).cuda()
        theta = (action[0] + 1) * np.pi
        phi = action[1] * np.pi / 2

        x = self.radius * torch.sin(theta) * torch.cos(phi)
        y = self.radius * torch.sin(theta) * torch.sin(phi)
        z = self.radius * torch.cos(theta)
        viewpoint = torch.tensor([x, y, z], dtype=torch.float32).cuda()

        face_centers = torch.mean(self.vertices[self.np.dot(self.reward_weights, r)faces], dim=1)
        face_normals = torch.cross(self.vertices[self.faces[:, 1]] - self.vertices[self.faces[:, 0]],
                                   self.vertices[self.faces[:, 2]] - self.vertices[self.faces[:, 0]])
        face_normals = face_normals / torch.norm(face_normals, dim=1, keepdim=True)

        view_directions = face_centers - viewpoint
        view_directions = view_directions / torch.norm(view_directions, dim=1, keepdim=True)

        visible_faces = torch.sum(face_normals * view_directions, dim=1) < 0

        newly_covered = visible_faces & ~self.covered_faces
        self.covered_faces |= visible_faces
        self.total_covered = torch.sum(self.covered_faces).item()

        coverage = self.total_covered / self.num_faces
        reward = self._calculate_reward(newly_covered, coverage)
        done = coverage >= self.coverage_threshold or self.steps >= 1000

        return self._get_obs(), reward, done, False, {}

    def _calculate_reward(self, newly_covered, coverage):
        # Calculate individual reward components
        r = np.array([
            torch.sum(newly_covered).item(),
            1 if coverage >= self.coverage_threshold else 0,
            len(self.actions)
            ])
        print(r)
        print(np.dot(self.reward_weights, r))
        # Combine reward components
        return np.dot(self.reward_weights, r)


    def _get_obs(self):
        return self.covered_faces.cpu().numpy()

    def render(self):
        # Create a new visualizer for each render call
        vis = o3d.visualization.Visualizer()
        vis.create_window()

        # Create a copy of the mesh for visualization
        vis_mesh = o3d.geometry.TriangleMesh()
        vis_mesh.vertices = o3d.utility.Vector3dVector(self.vertices.cpu().numpy())
        vis_mesh.triangles = o3d.utility.Vector3iVector(self.faces.cpu().numpy())

        # Color the mesh based on coverage
        colors = np.array([[0, 1, 0] if covered else [1, 0, 0] for covered in self.covered_faces.cpu().numpy()])
        vis_mesh.vertex_colors = o3d.utility.Vector3dVector(colors)

        # Add the mesh to the visualizer
        vis.add_geometry(vis_mesh)

        # Create a sphere to represent the viewpoint
        viewpoint = o3d.geometry.TriangleMesh.create_sphere(radius=1)
        viewpoint.compute_vertex_normals()
        viewpoint.paint_uniform_color([1, 0, 0])  # Blue color for viewpoint

        # Set the position of the viewpoint
        # Move last_action to CPU before using with numpy
        last_action_cpu = self.last_action.cpu().numpy()
        theta = (last_action_cpu[0] + 1) * np.pi
        phi = last_action_cpu[1] * np.pi / 2
        x = self.radius * np.sin(theta) * np.cos(phi)
        y = self.radius * np.sin(theta) * np.sin(phi)
        z = self.radius * np.cos(theta)
        viewpoint.translate([x, y, z])

        # # Add the viewpoint to the visualizer
        vis.add_geometry(viewpoint)
        # o3d.visualization.draw_geometries([viewpoint, vis_mesh])

        # # Set up the camera view
        ctr = vis.get_view_control()
        ctr.set_front([0, 0, -1])
        ctr.set_lookat([0, 0, 0])
        ctr.set_up([0, 1, 0])
        ctr.set_zoom(0.8)

        # # Update the geometry and render
        vis.update_geometry(vis_mesh)
        vis.update_geometry(viewpoint)
        vis.poll_events()
        vis.update_renderer()

        # Capture and display the image
        img = vis.capture_screen_float_buffer(do_render=True)
        vis.destroy_window()

        # Convert the image to numpy array and display using matplotlib
        plt.imshow(np.asarray(img))
        plt.title(f"Coverage: {self.total_covered / self.num_faces:.2%}")
        plt.axis('off')
        plt.show()


    def close(self):
        if hasattr(self, 'vis'):
            self.vis.destroy_window()

In [10]:
env = CoverageEnv("/home/dir/RL_CoveragePlanning/test_models/modified/test_7.obj")
obs = env.reset()

for i in range(20):
    action = env.action_space.sample()
    action_tensor = torch.tensor(action, dtype=torch.float32).cuda()
    
    obs, reward, done, _, info = env.step(action_tensor)
    print(f"Idx: {i:3d} | reward: {reward:7.2f} | Coverage Progress: {env.total_covered / env.num_faces :5.4f} |Done: {done}")
    # env.render()
    
    if done:
        break

[4551    0    1]
4541.0
Idx:   0 | reward: 4541.00 | Coverage Progress: 0.4551 |Done: False
[2406    0    2]
2386.0
Idx:   1 | reward: 2386.00 | Coverage Progress: 0.6957 |Done: False
[2045    0    3]
2015.0
Idx:   2 | reward: 2015.00 | Coverage Progress: 0.9002 |Done: False
[224   0   4]
184.0
Idx:   3 | reward:  184.00 | Coverage Progress: 0.9226 |Done: False
[49  0  5]
-1.0
Idx:   4 | reward:   -1.00 | Coverage Progress: 0.9275 |Done: False
[320   0   6]
260.0
Idx:   5 | reward:  260.00 | Coverage Progress: 0.9595 |Done: False
[5 0 7]
-65.0
Idx:   6 | reward:  -65.00 | Coverage Progress: 0.9600 |Done: False
[19  0  8]
-61.0
Idx:   7 | reward:  -61.00 | Coverage Progress: 0.9619 |Done: False
[278   1   9]
288.0
Idx:   8 | reward:  288.00 | Coverage Progress: 0.9897 |Done: True


  action = torch.tensor(action, dtype=torch.float32).cuda()


In [4]:
actions = env.actions

vis_mesh = o3d.geometry.TriangleMesh()
vis_mesh.vertices = o3d.utility.Vector3dVector(env.vertices.cpu().numpy())
vis_mesh.triangles = o3d.utility.Vector3iVector(env.faces.cpu().numpy())

objects = [vis_mesh]

radius = env.radius

for action in actions:
    # Create a sphere to represent the viewpoint
    viewpoint = o3d.geometry.TriangleMesh.create_sphere(radius=0.5)
    viewpoint.compute_vertex_normals()
    viewpoint.paint_uniform_color([1, 0, 0])  # Blue color for viewpoint

    # Set the position of the viewpoint
    # Move last_action to CPU before using with numpy
    action_cpu = action.cpu().numpy()
    theta = (action_cpu[0] + 1) * np.pi
    phi = action_cpu[1] * np.pi / 2
    x = radius * np.sin(theta) * np.cos(phi)
    y = radius * np.sin(theta) * np.sin(phi)
    z = radius * np.cos(theta)
    viewpoint.translate([x, y, z])

    objects.append(viewpoint)

o3d.visualization.draw_geometries(objects, mesh_show_wireframe = False)

## Training a Sample Agent

In [4]:
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy

2024-09-30 02:15:08.429597: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-30 02:15:08.431276: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-30 02:15:08.459192: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
env = ObjCoverageEnv("/home/dir/RL_CoveragePlanning/test_models/modified/test_7.obj")

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=int(1e5), progress_bar=True)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 19.8     |
|    ep_rew_mean     | 1e+03    |
| time/              |          |
|    fps             | 1301     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 18.6         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 1057         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0066525214 |
|    clip_fraction        | 0.035        |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.83        |
|    explained_variance   | -0.000432    |
|    learning_rate        | 0.0003       |
|    loss                 | 4.36e+04     |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00249     |
|    std                  | 0.99         |
|    value_loss           | 6.07e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 20           |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 1017         |
|    iterations           | 3            |
|    time_elapsed         | 6            |
|    total_timesteps      | 6144         |
| train/                  |              |
|    approx_kl            | 0.0035286718 |
|    clip_fraction        | 0.0213       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.81        |
|    explained_variance   | -0.0108      |
|    learning_rate        | 0.0003       |
|    loss                 | 4.13e+04     |
|    n_updates            | 20           |
|    policy_gradient_loss | -0.00164     |
|    std                  | 0.984        |
|    value_loss           | 6.34e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 20.3         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 1000         |
|    iterations           | 4            |
|    time_elapsed         | 8            |
|    total_timesteps      | 8192         |
| train/                  |              |
|    approx_kl            | 0.0038105575 |
|    clip_fraction        | 0.053        |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.8         |
|    explained_variance   | -0.003       |
|    learning_rate        | 0.0003       |
|    loss                 | 4.54e+04     |
|    n_updates            | 30           |
|    policy_gradient_loss | -0.00424     |
|    std                  | 0.976        |
|    value_loss           | 5.71e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 20.1         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 987          |
|    iterations           | 5            |
|    time_elapsed         | 10           |
|    total_timesteps      | 10240        |
| train/                  |              |
|    approx_kl            | 0.0031803688 |
|    clip_fraction        | 0.0291       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.78        |
|    explained_variance   | -0.00102     |
|    learning_rate        | 0.0003       |
|    loss                 | 3.5e+04      |
|    n_updates            | 40           |
|    policy_gradient_loss | -0.00262     |
|    std                  | 0.967        |
|    value_loss           | 5.59e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 19.3         |
|    ep_rew_mean          | 999          |
| time/                   |              |
|    fps                  | 978          |
|    iterations           | 6            |
|    time_elapsed         | 12           |
|    total_timesteps      | 12288        |
| train/                  |              |
|    approx_kl            | 0.0060643163 |
|    clip_fraction        | 0.0418       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.77        |
|    explained_variance   | -0.00048     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.18e+04     |
|    n_updates            | 50           |
|    policy_gradient_loss | -0.00327     |
|    std                  | 0.964        |
|    value_loss           | 5.62e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 18.6         |
|    ep_rew_mean          | 999          |
| time/                   |              |
|    fps                  | 971          |
|    iterations           | 7            |
|    time_elapsed         | 14           |
|    total_timesteps      | 14336        |
| train/                  |              |
|    approx_kl            | 0.0029902146 |
|    clip_fraction        | 0.0409       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.76        |
|    explained_variance   | -0.000263    |
|    learning_rate        | 0.0003       |
|    loss                 | 3.6e+04      |
|    n_updates            | 60           |
|    policy_gradient_loss | -0.00433     |
|    std                  | 0.956        |
|    value_loss           | 5.74e+04     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 19.2        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 969         |
|    iterations           | 8           |
|    time_elapsed         | 16          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.007702332 |
|    clip_fraction        | 0.0649      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.75       |
|    explained_variance   | -0.000162   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.99e+04    |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.00442    |
|    std                  | 0.954       |
|    value_loss           | 5.91e+04    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 20.4         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 968          |
|    iterations           | 9            |
|    time_elapsed         | 19           |
|    total_timesteps      | 18432        |
| train/                  |              |
|    approx_kl            | 0.0057820613 |
|    clip_fraction        | 0.063        |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.74        |
|    explained_variance   | -0.000106    |
|    learning_rate        | 0.0003       |
|    loss                 | 3.16e+04     |
|    n_updates            | 80           |
|    policy_gradient_loss | -0.00595     |
|    std                  | 0.949        |
|    value_loss           | 5.69e+04     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 18.3        |
|    ep_rew_mean          | 998         |
| time/                   |             |
|    fps                  | 965         |
|    iterations           | 10          |
|    time_elapsed         | 21          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.004911892 |
|    clip_fraction        | 0.0399      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.73       |
|    explained_variance   | -7.06e-05   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.09e+04    |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.00567    |
|    std                  | 0.943       |
|    value_loss           | 5.26e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 19.8        |
|    ep_rew_mean          | 999         |
| time/                   |             |
|    fps                  | 963         |
|    iterations           | 11          |
|    time_elapsed         | 23          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.007419013 |
|    clip_fraction        | 0.0812      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.72       |
|    explained_variance   | -4.99e-05   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.47e+04    |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.00458    |
|    std                  | 0.949       |
|    value_loss           | 5.79e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 22.1        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 962         |
|    iterations           | 12          |
|    time_elapsed         | 25          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.007657777 |
|    clip_fraction        | 0.0656      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.72       |
|    explained_variance   | -3.64e-05   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.63e+04    |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.00649    |
|    std                  | 0.937       |
|    value_loss           | 5.39e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 20.5        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 961         |
|    iterations           | 13          |
|    time_elapsed         | 27          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.008127503 |
|    clip_fraction        | 0.0749      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.69       |
|    explained_variance   | -2.69e-05   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.42e+04    |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.00307    |
|    std                  | 0.927       |
|    value_loss           | 4.6e+04     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 19.4         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 960          |
|    iterations           | 14           |
|    time_elapsed         | 29           |
|    total_timesteps      | 28672        |
| train/                  |              |
|    approx_kl            | 0.0035107613 |
|    clip_fraction        | 0.0729       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.69        |
|    explained_variance   | -1.97e-05    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.66e+04     |
|    n_updates            | 130          |
|    policy_gradient_loss | -0.00336     |
|    std                  | 0.927        |
|    value_loss           | 5.04e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 19.3         |
|    ep_rew_mean          | 999          |
| time/                   |              |
|    fps                  | 958          |
|    iterations           | 15           |
|    time_elapsed         | 32           |
|    total_timesteps      | 30720        |
| train/                  |              |
|    approx_kl            | 0.0042164912 |
|    clip_fraction        | 0.0697       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.69        |
|    explained_variance   | -1.53e-05    |
|    learning_rate        | 0.0003       |
|    loss                 | 4.08e+04     |
|    n_updates            | 140          |
|    policy_gradient_loss | -0.00346     |
|    std                  | 0.933        |
|    value_loss           | 5.18e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 18.4         |
|    ep_rew_mean          | 999          |
| time/                   |              |
|    fps                  | 958          |
|    iterations           | 16           |
|    time_elapsed         | 34           |
|    total_timesteps      | 32768        |
| train/                  |              |
|    approx_kl            | 0.0044580474 |
|    clip_fraction        | 0.0591       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.7         |
|    explained_variance   | -1.17e-05    |
|    learning_rate        | 0.0003       |
|    loss                 | 4.08e+04     |
|    n_updates            | 150          |
|    policy_gradient_loss | -0.00291     |
|    std                  | 0.935        |
|    value_loss           | 5.21e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 19.6         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 957          |
|    iterations           | 17           |
|    time_elapsed         | 36           |
|    total_timesteps      | 34816        |
| train/                  |              |
|    approx_kl            | 0.0044990685 |
|    clip_fraction        | 0.0471       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.7         |
|    explained_variance   | -8.82e-06    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.83e+04     |
|    n_updates            | 160          |
|    policy_gradient_loss | -0.00382     |
|    std                  | 0.935        |
|    value_loss           | 5.35e+04     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 20.1        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 956         |
|    iterations           | 18          |
|    time_elapsed         | 38          |
|    total_timesteps      | 36864       |
| train/                  |             |
|    approx_kl            | 0.008903649 |
|    clip_fraction        | 0.0563      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.71       |
|    explained_variance   | -6.79e-06   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.3e+04     |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.00469    |
|    std                  | 0.942       |
|    value_loss           | 5.06e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 18          |
|    ep_rew_mean          | 999         |
| time/                   |             |
|    fps                  | 956         |
|    iterations           | 19          |
|    time_elapsed         | 40          |
|    total_timesteps      | 38912       |
| train/                  |             |
|    approx_kl            | 0.018859822 |
|    clip_fraction        | 0.17        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.73       |
|    explained_variance   | -5.25e-06   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.42e+04    |
|    n_updates            | 180         |
|    policy_gradient_loss | 0.00715     |
|    std                  | 0.952       |
|    value_loss           | 4.95e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 21          |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 956         |
|    iterations           | 20          |
|    time_elapsed         | 42          |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.007646161 |
|    clip_fraction        | 0.0884      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.74       |
|    explained_variance   | -4.29e-06   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.73e+04    |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.0033     |
|    std                  | 0.951       |
|    value_loss           | 5.28e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 21.1        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 956         |
|    iterations           | 21          |
|    time_elapsed         | 44          |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.003402582 |
|    clip_fraction        | 0.105       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.74       |
|    explained_variance   | -3.1e-06    |
|    learning_rate        | 0.0003      |
|    loss                 | 2.79e+04    |
|    n_updates            | 200         |
|    policy_gradient_loss | 0.00312     |
|    std                  | 0.953       |
|    value_loss           | 4.63e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 18.7        |
|    ep_rew_mean          | 999         |
| time/                   |             |
|    fps                  | 956         |
|    iterations           | 22          |
|    time_elapsed         | 47          |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.014461054 |
|    clip_fraction        | 0.186       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.73       |
|    explained_variance   | -2.26e-06   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.79e+04    |
|    n_updates            | 210         |
|    policy_gradient_loss | 0.00824     |
|    std                  | 0.95        |
|    value_loss           | 4.71e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 23.4        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 958         |
|    iterations           | 23          |
|    time_elapsed         | 49          |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.005051315 |
|    clip_fraction        | 0.042       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.73       |
|    explained_variance   | -1.43e-06   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.72e+04    |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.00247    |
|    std                  | 0.946       |
|    value_loss           | 5.02e+04    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 21.7         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 958          |
|    iterations           | 24           |
|    time_elapsed         | 51           |
|    total_timesteps      | 49152        |
| train/                  |              |
|    approx_kl            | 0.0013722598 |
|    clip_fraction        | 0.114        |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.73        |
|    explained_variance   | -1.19e-06    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.36e+04     |
|    n_updates            | 230          |
|    policy_gradient_loss | 0.00167      |
|    std                  | 0.948        |
|    value_loss           | 4.03e+04     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 22.7        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 957         |
|    iterations           | 25          |
|    time_elapsed         | 53          |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.010978737 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.73       |
|    explained_variance   | -9.54e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.5e+04     |
|    n_updates            | 240         |
|    policy_gradient_loss | 0.00916     |
|    std                  | 0.944       |
|    value_loss           | 4.55e+04    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 22.9         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 956          |
|    iterations           | 26           |
|    time_elapsed         | 55           |
|    total_timesteps      | 53248        |
| train/                  |              |
|    approx_kl            | 0.0016085573 |
|    clip_fraction        | 0.0759       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.72        |
|    explained_variance   | -3.58e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.6e+04      |
|    n_updates            | 250          |
|    policy_gradient_loss | 0.00159      |
|    std                  | 0.943        |
|    value_loss           | 4.16e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 23           |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 955          |
|    iterations           | 27           |
|    time_elapsed         | 57           |
|    total_timesteps      | 55296        |
| train/                  |              |
|    approx_kl            | 0.0075868373 |
|    clip_fraction        | 0.14         |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.72        |
|    explained_variance   | -3.58e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.12e+04     |
|    n_updates            | 260          |
|    policy_gradient_loss | -0.000614    |
|    std                  | 0.943        |
|    value_loss           | 4.3e+04      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 23.3        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 954         |
|    iterations           | 28          |
|    time_elapsed         | 60          |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.026271436 |
|    clip_fraction        | 0.0932      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.71       |
|    explained_variance   | -2.38e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.65e+04    |
|    n_updates            | 270         |
|    policy_gradient_loss | 0.000261    |
|    std                  | 0.94        |
|    value_loss           | 4.35e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 23.3        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 953         |
|    iterations           | 29          |
|    time_elapsed         | 62          |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.003167157 |
|    clip_fraction        | 0.0507      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.71       |
|    explained_variance   | -2.38e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 3.11e+04    |
|    n_updates            | 280         |
|    policy_gradient_loss | -6.76e-05   |
|    std                  | 0.933       |
|    value_loss           | 4.13e+04    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 27           |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 952          |
|    iterations           | 30           |
|    time_elapsed         | 64           |
|    total_timesteps      | 61440        |
| train/                  |              |
|    approx_kl            | 0.0029853792 |
|    clip_fraction        | 0.104        |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.7         |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.1e+04      |
|    n_updates            | 290          |
|    policy_gradient_loss | 0.00261      |
|    std                  | 0.931        |
|    value_loss           | 4.11e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 28.2         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 951          |
|    iterations           | 31           |
|    time_elapsed         | 66           |
|    total_timesteps      | 63488        |
| train/                  |              |
|    approx_kl            | 0.0020689203 |
|    clip_fraction        | 0.0621       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.7         |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 4.55e+03     |
|    n_updates            | 300          |
|    policy_gradient_loss | -0.000572    |
|    std                  | 0.935        |
|    value_loss           | 3.8e+04      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 25.4        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 952         |
|    iterations           | 32          |
|    time_elapsed         | 68          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.008724133 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.7        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 3.3e+04     |
|    n_updates            | 310         |
|    policy_gradient_loss | -0.00285    |
|    std                  | 0.933       |
|    value_loss           | 3.74e+04    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 26.6         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 951          |
|    iterations           | 33           |
|    time_elapsed         | 71           |
|    total_timesteps      | 67584        |
| train/                  |              |
|    approx_kl            | 0.0032018581 |
|    clip_fraction        | 0.0983       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.7         |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.6e+04      |
|    n_updates            | 320          |
|    policy_gradient_loss | 0.00285      |
|    std                  | 0.932        |
|    value_loss           | 4.1e+04      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 27.5         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 951          |
|    iterations           | 34           |
|    time_elapsed         | 73           |
|    total_timesteps      | 69632        |
| train/                  |              |
|    approx_kl            | 0.0109917745 |
|    clip_fraction        | 0.115        |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.69        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.66e+04     |
|    n_updates            | 330          |
|    policy_gradient_loss | 0.000676     |
|    std                  | 0.926        |
|    value_loss           | 3.79e+04     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 29.6        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 951         |
|    iterations           | 35          |
|    time_elapsed         | 75          |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.003671292 |
|    clip_fraction        | 0.0947      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.68       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.88e+04    |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.000579   |
|    std                  | 0.923       |
|    value_loss           | 3.71e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 31.7        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 951         |
|    iterations           | 36          |
|    time_elapsed         | 77          |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.011321245 |
|    clip_fraction        | 0.105       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.68       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 9.18e+03    |
|    n_updates            | 350         |
|    policy_gradient_loss | 0.0021      |
|    std                  | 0.931       |
|    value_loss           | 3.28e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 32.5        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 950         |
|    iterations           | 37          |
|    time_elapsed         | 79          |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.007188419 |
|    clip_fraction        | 0.0867      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.7        |
|    explained_variance   | 2.38e-07    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.11e+04    |
|    n_updates            | 360         |
|    policy_gradient_loss | 0.00153     |
|    std                  | 0.936       |
|    value_loss           | 3.47e+04    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 35.8         |
|    ep_rew_mean          | 999          |
| time/                   |              |
|    fps                  | 950          |
|    iterations           | 38           |
|    time_elapsed         | 81           |
|    total_timesteps      | 77824        |
| train/                  |              |
|    approx_kl            | 0.0026288582 |
|    clip_fraction        | 0.153        |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.71        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.39e+04     |
|    n_updates            | 370          |
|    policy_gradient_loss | 0.00511      |
|    std                  | 0.939        |
|    value_loss           | 3.01e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 37.6         |
|    ep_rew_mean          | 999          |
| time/                   |              |
|    fps                  | 950          |
|    iterations           | 39           |
|    time_elapsed         | 84           |
|    total_timesteps      | 79872        |
| train/                  |              |
|    approx_kl            | 0.0007875981 |
|    clip_fraction        | 0.0651       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.71        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.05e+04     |
|    n_updates            | 380          |
|    policy_gradient_loss | 0.00148      |
|    std                  | 0.94         |
|    value_loss           | 2.82e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 35           |
|    ep_rew_mean          | 999          |
| time/                   |              |
|    fps                  | 949          |
|    iterations           | 40           |
|    time_elapsed         | 86           |
|    total_timesteps      | 81920        |
| train/                  |              |
|    approx_kl            | 0.0052996557 |
|    clip_fraction        | 0.0242       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.71        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.74e+04     |
|    n_updates            | 390          |
|    policy_gradient_loss | -0.000804    |
|    std                  | 0.939        |
|    value_loss           | 2.88e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 36.5         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 949          |
|    iterations           | 41           |
|    time_elapsed         | 88           |
|    total_timesteps      | 83968        |
| train/                  |              |
|    approx_kl            | 0.0039480035 |
|    clip_fraction        | 0.02         |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.72        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.95e+04     |
|    n_updates            | 400          |
|    policy_gradient_loss | -0.00124     |
|    std                  | 0.942        |
|    value_loss           | 2.94e+04     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 40.3         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 949          |
|    iterations           | 42           |
|    time_elapsed         | 90           |
|    total_timesteps      | 86016        |
| train/                  |              |
|    approx_kl            | 0.0026578172 |
|    clip_fraction        | 0.0773       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.72        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.2e+04      |
|    n_updates            | 410          |
|    policy_gradient_loss | 0.000178     |
|    std                  | 0.946        |
|    value_loss           | 2.9e+04      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 45.2         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 949          |
|    iterations           | 43           |
|    time_elapsed         | 92           |
|    total_timesteps      | 88064        |
| train/                  |              |
|    approx_kl            | 0.0032288088 |
|    clip_fraction        | 0.0512       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.73        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.99e+04     |
|    n_updates            | 420          |
|    policy_gradient_loss | -0.00348     |
|    std                  | 0.95         |
|    value_loss           | 2.44e+04     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 46.2        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 949         |
|    iterations           | 44          |
|    time_elapsed         | 94          |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.006509505 |
|    clip_fraction        | 0.097       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.74       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 1.88e+04    |
|    n_updates            | 430         |
|    policy_gradient_loss | 0.000335    |
|    std                  | 0.951       |
|    value_loss           | 2.18e+04    |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 48.6       |
|    ep_rew_mean          | 1e+03      |
| time/                   |            |
|    fps                  | 949        |
|    iterations           | 45         |
|    time_elapsed         | 97         |
|    total_timesteps      | 92160      |
| train/                  |            |
|    approx_kl            | 0.00410219 |
|    clip_fraction        | 0.0699     |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.73      |
|    explained_variance   | 5.96e-08   |
|    learning_rate        | 0.0003     |
|    loss                 | 2.41e+04   |
|    n_updates            | 440        |
|    policy_gradient_loss | -0.000667  |
|    std                  | 0.946      |
|    value_loss           | 2.3e+04    |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 54.7         |
|    ep_rew_mean          | 1e+03        |
| time/                   |              |
|    fps                  | 949          |
|    iterations           | 46           |
|    time_elapsed         | 99           |
|    total_timesteps      | 94208        |
| train/                  |              |
|    approx_kl            | 0.0041794716 |
|    clip_fraction        | 0.0495       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.72        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.1e+04      |
|    n_updates            | 450          |
|    policy_gradient_loss | -0.00111     |
|    std                  | 0.94         |
|    value_loss           | 2.12e+04     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 61.3        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 949         |
|    iterations           | 47          |
|    time_elapsed         | 101         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.004353442 |
|    clip_fraction        | 0.0712      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.71       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.87e+03    |
|    n_updates            | 460         |
|    policy_gradient_loss | -0.00182    |
|    std                  | 0.935       |
|    value_loss           | 1.71e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 71.5        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 949         |
|    iterations           | 48          |
|    time_elapsed         | 103         |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.006320783 |
|    clip_fraction        | 0.0947      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.71       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.79e+03    |
|    n_updates            | 470         |
|    policy_gradient_loss | -0.00203    |
|    std                  | 0.941       |
|    value_loss           | 1.48e+04    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 84.2        |
|    ep_rew_mean          | 1e+03       |
| time/                   |             |
|    fps                  | 949         |
|    iterations           | 49          |
|    time_elapsed         | 105         |
|    total_timesteps      | 100352      |
| train/                  |             |
|    approx_kl            | 0.005269033 |
|    clip_fraction        | 0.0388      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.71       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 6.52e+03    |
|    n_updates            | 480         |
|    policy_gradient_loss | -0.00318    |
|    std                  | 0.938       |
|    value_loss           | 9.61e+03    |
-----------------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7f0101a2b3d0>

In [6]:
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=100)

In [7]:
print(f"{mean_reward=} | {std_reward=}")

mean_reward=774.6999999999998 | std_reward=2.2737367544323206e-13
