In [6]:
import gymnasium as gym
import gymnasium_robotics
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
from stable_baselines3.common.vec_env import VecNormalize

In [7]:
def setup_video_recorder(env, video_folder='videos/', record_freq=10000, video_length=200):
    """
    Wrap the environment with a video recorder to capture agent performances.
    """
    env = VecVideoRecorder(
        env,
        video_folder,
        record_video_trigger=lambda step: step % record_freq == 0,
        video_length=video_length,
        name_prefix='rl-agent'
    )
    return env

def setup_checkpoint_callback(checkpoint_dir='checkpoints/', save_freq=1000):
    """
    Create a callback that saves the model at regular intervals.
    """
    checkpoint_callback = CheckpointCallback(
        save_freq=save_freq,
        save_path=checkpoint_dir,
        name_prefix='rl_model',
        save_replay_buffer=True,
        save_vecnormalize=True,
        verbose=1
    )
    return checkpoint_callback


In [None]:
# Parameters
env_id = 'HandManipulateEgg-v1'
total_timesteps = 5000000
checkpoint_dir = './checkpoints/'
video_folder = './videos/'

def make_env():
    return gym.make("HandManipulateEgg-v1", reward_type='dense', render_mode='rgb_array')

env = DummyVecEnv([make_env])
env = VecNormalize(env, norm_obs=True, norm_reward=True)
env = setup_video_recorder(env, video_folder=video_folder)

# PPO policy network config
policy_kwargs = dict(
    net_arch=dict(
        pi=[256, 256],
        vf=[256, 256]
    )
)

# Instantiate PPO model
model = PPO(
    'MultiInputPolicy',
    env,
    learning_rate=2e-4,
    clip_range=0.1,
    n_steps=2048,
    batch_size=64,
    policy_kwargs=policy_kwargs,
    verbose=1,
    tensorboard_log="./ppo_tensorboard/"
)

checkpoint_callback = setup_checkpoint_callback(checkpoint_dir=checkpoint_dir)

# Train and save
model.learn(total_timesteps=total_timesteps, callback=checkpoint_callback)
model.save(f"{checkpoint_dir}/final_model")

env.close()


Using cpu device
Logging to ./ppo_tensorboard/PPO_27
Saving video to C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-0-to-step-200.mp4
MoviePy - Building video C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-0-to-step-200.mp4.
MoviePy - Writing video C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-0-to-step-200.mp4



                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-0-to-step-200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    fps             | 280      |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    success_rate         | 0           |
| time/                   |             |
|    fps                  | 264         |
|    iterations           | 2           |
|    time_elapsed         | 15          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.027855957 |
|    clip_fraction        | 0.59        |
|    clip_range           | 0.1         |
|    entropy_loss         | -28.4       |
|    

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-10000-to-step-10200.mp4
-----------------------------------------
| rollout/                |             |
|    success_rate         | 0           |
| time/                   |             |
|    fps                  | 249         |
|    iterations           | 5           |
|    time_elapsed         | 41          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.032566074 |
|    clip_fraction        | 0.611       |
|    clip_range           | 0.1         |
|    entropy_loss         | -28.3       |
|    explained_variance   | 0.399       |
|    learning_rate        | 0.0002      |
|    loss                 | -0.0433     |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.0483     |
|    std                  | 0.997       |
|    value_loss           | 0.0551      |
-----------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-20000-to-step-20200.mp4
-----------------------------------------
| rollout/                |             |
|    success_rate         | 0           |
| time/                   |             |
|    fps                  | 244         |
|    iterations           | 10          |
|    time_elapsed         | 83          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.040870685 |
|    clip_fraction        | 0.64        |
|    clip_range           | 0.1         |
|    entropy_loss         | -28.3       |
|    explained_variance   | 0.636       |
|    learning_rate        | 0.0002      |
|    loss                 | -0.0568     |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0485     |
|    std                  | 0.996       |
|    value_loss           | 0.033       |
-----------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-30000-to-step-30200.mp4
-----------------------------------------
| rollout/                |             |
|    success_rate         | 0           |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 15          |
|    time_elapsed         | 126         |
|    total_timesteps      | 30720       |
| train/                  |             |
|    approx_kl            | 0.050246704 |
|    clip_fraction        | 0.671       |
|    clip_range           | 0.1         |
|    entropy_loss         | -28.2       |
|    explained_variance   | 0.813       |
|    learning_rate        | 0.0002      |
|    loss                 | -0.0703     |
|    n_updates            | 140         |
|    policy_gradient_loss | -0.0505     |
|    std                  | 0.99        |
|    value_loss           | 0.0244      |
-----------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-40000-to-step-40200.mp4
-----------------------------------------
| rollout/                |             |
|    success_rate         | 0           |
| time/                   |             |
|    fps                  | 242         |
|    iterations           | 20          |
|    time_elapsed         | 169         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.054029323 |
|    clip_fraction        | 0.681       |
|    clip_range           | 0.1         |
|    entropy_loss         | -28.1       |
|    explained_variance   | 0.915       |
|    learning_rate        | 0.0002      |
|    loss                 | -0.06       |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.0508     |
|    std                  | 0.987       |
|    value_loss           | 0.016       |
-----------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-50000-to-step-50200.mp4
----------------------------------------
| rollout/                |            |
|    success_rate         | 0          |
| time/                   |            |
|    fps                  | 242        |
|    iterations           | 25         |
|    time_elapsed         | 211        |
|    total_timesteps      | 51200      |
| train/                  |            |
|    approx_kl            | 0.05555003 |
|    clip_fraction        | 0.686      |
|    clip_range           | 0.1        |
|    entropy_loss         | -28.1      |
|    explained_variance   | 0.941      |
|    learning_rate        | 0.0002     |
|    loss                 | -0.0646    |
|    n_updates            | 240        |
|    policy_gradient_loss | -0.0499    |
|    std                  | 0.987      |
|    value_loss           | 0.0117     |
-------------------------------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-60000-to-step-60200.mp4
-----------------------------------------
| rollout/                |             |
|    success_rate         | 0           |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 30          |
|    time_elapsed         | 252         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.059850365 |
|    clip_fraction        | 0.69        |
|    clip_range           | 0.1         |
|    entropy_loss         | -28.1       |
|    explained_variance   | 0.926       |
|    learning_rate        | 0.0002      |
|    loss                 | -0.0619     |
|    n_updates            | 290         |
|    policy_gradient_loss | -0.0492     |
|    std                  | 0.983       |
|    value_loss           | 0.00948     |
-----------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-70000-to-step-70200.mp4
-----------------------------------------
| rollout/                |             |
|    success_rate         | 0           |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 35          |
|    time_elapsed         | 294         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.067142986 |
|    clip_fraction        | 0.712       |
|    clip_range           | 0.1         |
|    entropy_loss         | -27.9       |
|    explained_variance   | 0.963       |
|    learning_rate        | 0.0002      |
|    loss                 | -0.0764     |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.0508     |
|    std                  | 0.975       |
|    value_loss           | 0.00849     |
-----------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-80000-to-step-80200.mp4
----------------------------------------
| rollout/                |            |
|    success_rate         | 0          |
| time/                   |            |
|    fps                  | 241        |
|    iterations           | 40         |
|    time_elapsed         | 338        |
|    total_timesteps      | 81920      |
| train/                  |            |
|    approx_kl            | 0.06572673 |
|    clip_fraction        | 0.714      |
|    clip_range           | 0.1        |
|    entropy_loss         | -27.7      |
|    explained_variance   | 0.947      |
|    learning_rate        | 0.0002     |
|    loss                 | -0.0651    |
|    n_updates            | 390        |
|    policy_gradient_loss | -0.0499    |
|    std                  | 0.966      |
|    value_loss           | 0.00826    |
-------------------------------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-90000-to-step-90200.mp4
--------------------------------------
| rollout/                |          |
|    success_rate         | 0        |
| time/                   |          |
|    fps                  | 240      |
|    iterations           | 45       |
|    time_elapsed         | 383      |
|    total_timesteps      | 92160    |
| train/                  |          |
|    approx_kl            | 0.070062 |
|    clip_fraction        | 0.712    |
|    clip_range           | 0.1      |
|    entropy_loss         | -27.6    |
|    explained_variance   | 0.937    |
|    learning_rate        | 0.0002   |
|    loss                 | -0.0634  |
|    n_updates            | 440      |
|    policy_gradient_loss | -0.0481  |
|    std                  | 0.96     |
|    value_loss           | 0.00688  |
--------------------------------------
--------------------------------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-100000-to-step-100200.mp4
----------------------------------------
| rollout/                |            |
|    success_rate         | 0          |
| time/                   |            |
|    fps                  | 241        |
|    iterations           | 49         |
|    time_elapsed         | 415        |
|    total_timesteps      | 100352     |
| train/                  |            |
|    approx_kl            | 0.08424823 |
|    clip_fraction        | 0.722      |
|    clip_range           | 0.1        |
|    entropy_loss         | -27.5      |
|    explained_variance   | 0.955      |
|    learning_rate        | 0.0002     |
|    loss                 | -0.0579    |
|    n_updates            | 480        |
|    policy_gradient_loss | -0.0505    |
|    std                  | 0.958      |
|    value_loss           | 0.00593    |
-----------------------------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-110000-to-step-110200.mp4
----------------------------------------
| rollout/                |            |
|    success_rate         | 0          |
| time/                   |            |
|    fps                  | 242        |
|    iterations           | 54         |
|    time_elapsed         | 455        |
|    total_timesteps      | 110592     |
| train/                  |            |
|    approx_kl            | 0.08423341 |
|    clip_fraction        | 0.73       |
|    clip_range           | 0.1        |
|    entropy_loss         | -27.4      |
|    explained_variance   | 0.956      |
|    learning_rate        | 0.0002     |
|    loss                 | -0.0595    |
|    n_updates            | 530        |
|    policy_gradient_loss | -0.0493    |
|    std                  | 0.951      |
|    value_loss           | 0.00685    |
-----------------------------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-120000-to-step-120200.mp4
----------------------------------------
| rollout/                |            |
|    success_rate         | 0          |
| time/                   |            |
|    fps                  | 244        |
|    iterations           | 59         |
|    time_elapsed         | 493        |
|    total_timesteps      | 120832     |
| train/                  |            |
|    approx_kl            | 0.08642259 |
|    clip_fraction        | 0.736      |
|    clip_range           | 0.1        |
|    entropy_loss         | -27.3      |
|    explained_variance   | 0.921      |
|    learning_rate        | 0.0002     |
|    loss                 | -0.0719    |
|    n_updates            | 580        |
|    policy_gradient_loss | -0.0488    |
|    std                  | 0.946      |
|    value_loss           | 0.00621    |
-----------------------------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-130000-to-step-130200.mp4
----------------------------------------
| rollout/                |            |
|    success_rate         | 0          |
| time/                   |            |
|    fps                  | 246        |
|    iterations           | 64         |
|    time_elapsed         | 531        |
|    total_timesteps      | 131072     |
| train/                  |            |
|    approx_kl            | 0.09264924 |
|    clip_fraction        | 0.733      |
|    clip_range           | 0.1        |
|    entropy_loss         | -27.2      |
|    explained_variance   | 0.966      |
|    learning_rate        | 0.0002     |
|    loss                 | -0.0712    |
|    n_updates            | 630        |
|    policy_gradient_loss | -0.0488    |
|    std                  | 0.943      |
|    value_loss           | 0.00601    |
-----------------------------

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-140000-to-step-140200.mp4
----------------------------------------
| rollout/                |            |
|    success_rate         | 0          |
| time/                   |            |
|    fps                  | 248        |
|    iterations           | 69         |
|    time_elapsed         | 569        |
|    total_timesteps      | 141312     |
| train/                  |            |
|    approx_kl            | 0.09599351 |
|    clip_fraction        | 0.746      |
|    clip_range           | 0.1        |
|    entropy_loss         | -27.1      |
|    explained_variance   | 0.971      |
|    learning_rate        | 0.0002     |
|    loss                 | -0.0585    |
|    n_updates            | 680        |
|    policy_gradient_loss | -0.0479    |
|    std                  | 0.936      |
|    value_loss           | 0.00449    |
-----------------------------