In [1]:
import gymnasium as gym
import gymnasium_robotics
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.vec_env import SubprocVecEnv, VecVideoRecorder, DummyVecEnv
from stable_baselines3.common.vec_env import VecNormalize

In [2]:
def setup_video_recorder(env, video_folder='videos/', record_freq=10000, video_length=200):
    """
    Wrap the environment with a video recorder to capture agent performances.
    """
    env = VecVideoRecorder(
        env,
        video_folder,
        record_video_trigger=lambda step: step % record_freq == 0,
        video_length=video_length,
        name_prefix='rl-agent'
    )
    return env

def setup_checkpoint_callback(checkpoint_dir='checkpoints/', save_freq=1000):
    """
    Create a callback that saves the model at regular intervals.
    """
    checkpoint_callback = CheckpointCallback(
        save_freq=save_freq,
        save_path=checkpoint_dir,
        name_prefix='rl_model',
        save_replay_buffer=True,
        save_vecnormalize=True,
        verbose=1
    )
    return checkpoint_callback


In [5]:
# Parameters
env_id = 'HandManipulateEgg-v1'
total_timesteps = 5000000
checkpoint_dir = './checkpoints/'
video_folder = './videos/'

def make_env():
    return gym.make("HandManipulateEgg-v1", reward_type='dense', render_mode='rgb_array')

env = DummyVecEnv([make_env])
env = VecNormalize(env, norm_obs=True, norm_reward=True)
env = setup_video_recorder(env, video_folder=video_folder)

# PPO policy network config
policy_kwargs = dict(
    net_arch=[256, 256]
)

# Instantiate PPO model
model = SAC(
    'MultiInputPolicy',
    env,
    learning_rate=1e-4,
    batch_size=128,
    policy_kwargs=policy_kwargs,
    verbose=1,
    tensorboard_log="./sac_tensorboard/",
    device='cuda'
)

checkpoint_callback = setup_checkpoint_callback(checkpoint_dir=checkpoint_dir)

# Train and save
model.learn(total_timesteps=total_timesteps, callback=checkpoint_callback)
model.save(f"{checkpoint_dir}/final_model")

env.close()


Using cpu device
Logging to ./sac_tensorboard/SAC_2
Saving video to C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-0-to-step-200.mp4
MoviePy - Building video C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-0-to-step-200.mp4.
MoviePy - Writing video C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-0-to-step-200.mp4



                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-0-to-step-200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 4        |
|    fps             | 78       |
|    time_elapsed    | 5        |
|    total_timesteps | 400      |
| train/             |          |
|    actor_loss      | -30.3    |
|    critic_loss     | 3.45     |
|    ent_coef        | 0.971    |
|    ent_coef_loss   | -1       |
|    learning_rate   | 0.0001   |
|    n_updates       | 299      |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 8        |
|    fps             | 79       |
|    time_elapsed    | 10       |
|    total_timesteps | 800      |
| train/             |          |
|    ac

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 64       |
|    fps             | 70       |
|    time_elapsed    | 90       |
|    total_timesteps | 6400     |
| train/             |          |
|    actor_loss      | -231     |
|    critic_loss     | 17.1     |
|    ent_coef        | 0.541    |
|    ent_coef_loss   | -17      |
|    learning_rate   | 0.0001   |
|    n_updates       | 6299     |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 68       |
|    fps             | 71       |
|    time_elapsed    | 95       |
|    total_timesteps | 6800     |
| train/             |          |
|    actor_loss      | -236     |
|    critic_loss     | 16.6     |
|    ent_coef        | 0.521    |
|    ent_coef_loss   | -17.6    |
|    learning_

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-10000-to-step-10200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 104      |
|    fps             | 71       |
|    time_elapsed    | 146      |
|    total_timesteps | 10400    |
| train/             |          |
|    actor_loss      | -285     |
|    critic_loss     | 16       |
|    ent_coef        | 0.369    |
|    ent_coef_loss   | -22.3    |
|    learning_rate   | 0.0001   |
|    n_updates       | 10299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 108      |
|    fps             | 71       |
|    time_elapsed    | 151      |
|    total_timesteps | 10800    |
| train/             |          |
|

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 164      |
|    fps             | 71       |
|    time_elapsed    | 230      |
|    total_timesteps | 16400    |
| train/             |          |
|    actor_loss      | -308     |
|    critic_loss     | 13.2     |
|    ent_coef        | 0.208    |
|    ent_coef_loss   | -28.7    |
|    learning_rate   | 0.0001   |
|    n_updates       | 16299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 168      |
|    fps             | 71       |
|    time_elapsed    | 235      |
|    total_timesteps | 16800    |
| train/             |          |
|    actor_loss      | -310     |
|    critic_loss     | 9.62     |
|    ent_coef        | 0.2      |
|    ent_coef_loss   | -25.4    |
|    learning_

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-20000-to-step-20200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 204      |
|    fps             | 71       |
|    time_elapsed    | 285      |
|    total_timesteps | 20400    |
| train/             |          |
|    actor_loss      | -307     |
|    critic_loss     | 10.7     |
|    ent_coef        | 0.143    |
|    ent_coef_loss   | -24.7    |
|    learning_rate   | 0.0001   |
|    n_updates       | 20299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 208      |
|    fps             | 71       |
|    time_elapsed    | 290      |
|    total_timesteps | 20800    |
| train/             |          |
|

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 264      |
|    fps             | 71       |
|    time_elapsed    | 369      |
|    total_timesteps | 26400    |
| train/             |          |
|    actor_loss      | -290     |
|    critic_loss     | 8.78     |
|    ent_coef        | 0.0843   |
|    ent_coef_loss   | -16.5    |
|    learning_rate   | 0.0001   |
|    n_updates       | 26299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 268      |
|    fps             | 71       |
|    time_elapsed    | 374      |
|    total_timesteps | 26800    |
| train/             |          |
|    actor_loss      | -284     |
|    critic_loss     | 6.41     |
|    ent_coef        | 0.0814   |
|    ent_coef_loss   | -16      |
|    learning_

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-30000-to-step-30200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 304      |
|    fps             | 71       |
|    time_elapsed    | 426      |
|    total_timesteps | 30400    |
| train/             |          |
|    actor_loss      | -278     |
|    critic_loss     | 7.21     |
|    ent_coef        | 0.0624   |
|    ent_coef_loss   | -5.83    |
|    learning_rate   | 0.0001   |
|    n_updates       | 30299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 308      |
|    fps             | 71       |
|    time_elapsed    | 431      |
|    total_timesteps | 30800    |
| train/             |          |
|

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 364      |
|    fps             | 71       |
|    time_elapsed    | 509      |
|    total_timesteps | 36400    |
| train/             |          |
|    actor_loss      | -251     |
|    critic_loss     | 4.53     |
|    ent_coef        | 0.0468   |
|    ent_coef_loss   | 8.02     |
|    learning_rate   | 0.0001   |
|    n_updates       | 36299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 368      |
|    fps             | 71       |
|    time_elapsed    | 514      |
|    total_timesteps | 36800    |
| train/             |          |
|    actor_loss      | -252     |
|    critic_loss     | 3.25     |
|    ent_coef        | 0.0464   |
|    ent_coef_loss   | -0.0125  |
|    learning_

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-40000-to-step-40200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 404      |
|    fps             | 71       |
|    time_elapsed    | 564      |
|    total_timesteps | 40400    |
| train/             |          |
|    actor_loss      | -232     |
|    critic_loss     | 2.87     |
|    ent_coef        | 0.0401   |
|    ent_coef_loss   | -0.867   |
|    learning_rate   | 0.0001   |
|    n_updates       | 40299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 408      |
|    fps             | 71       |
|    time_elapsed    | 569      |
|    total_timesteps | 40800    |
| train/             |          |
|

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 464      |
|    fps             | 71       |
|    time_elapsed    | 646      |
|    total_timesteps | 46400    |
| train/             |          |
|    actor_loss      | -208     |
|    critic_loss     | 1.6      |
|    ent_coef        | 0.0316   |
|    ent_coef_loss   | -0.822   |
|    learning_rate   | 0.0001   |
|    n_updates       | 46299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 468      |
|    fps             | 71       |
|    time_elapsed    | 651      |
|    total_timesteps | 46800    |
| train/             |          |
|    actor_loss      | -203     |
|    critic_loss     | 1.65     |
|    ent_coef        | 0.0311   |
|    ent_coef_loss   | -2.14    |
|    learning_

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-50000-to-step-50200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 504      |
|    fps             | 71       |
|    time_elapsed    | 702      |
|    total_timesteps | 50400    |
| train/             |          |
|    actor_loss      | -188     |
|    critic_loss     | 1.63     |
|    ent_coef        | 0.0265   |
|    ent_coef_loss   | 1.16     |
|    learning_rate   | 0.0001   |
|    n_updates       | 50299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 508      |
|    fps             | 71       |
|    time_elapsed    | 707      |
|    total_timesteps | 50800    |
| train/             |          |
|

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 564      |
|    fps             | 71       |
|    time_elapsed    | 785      |
|    total_timesteps | 56400    |
| train/             |          |
|    actor_loss      | -162     |
|    critic_loss     | 1.27     |
|    ent_coef        | 0.0211   |
|    ent_coef_loss   | -1.04    |
|    learning_rate   | 0.0001   |
|    n_updates       | 56299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 568      |
|    fps             | 71       |
|    time_elapsed    | 790      |
|    total_timesteps | 56800    |
| train/             |          |
|    actor_loss      | -159     |
|    critic_loss     | 1.1      |
|    ent_coef        | 0.0208   |
|    ent_coef_loss   | -7.85    |
|    learning_

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-60000-to-step-60200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 604      |
|    fps             | 71       |
|    time_elapsed    | 845      |
|    total_timesteps | 60400    |
| train/             |          |
|    actor_loss      | -147     |
|    critic_loss     | 0.879    |
|    ent_coef        | 0.0176   |
|    ent_coef_loss   | -4.32    |
|    learning_rate   | 0.0001   |
|    n_updates       | 60299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 608      |
|    fps             | 71       |
|    time_elapsed    | 851      |
|    total_timesteps | 60800    |
| train/             |          |
|

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 664      |
|    fps             | 71       |
|    time_elapsed    | 934      |
|    total_timesteps | 66400    |
| train/             |          |
|    actor_loss      | -125     |
|    critic_loss     | 0.608    |
|    ent_coef        | 0.0132   |
|    ent_coef_loss   | -0.888   |
|    learning_rate   | 0.0001   |
|    n_updates       | 66299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 668      |
|    fps             | 71       |
|    time_elapsed    | 940      |
|    total_timesteps | 66800    |
| train/             |          |
|    actor_loss      | -123     |
|    critic_loss     | 0.701    |
|    ent_coef        | 0.0132   |
|    ent_coef_loss   | 7.6      |
|    learning_

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-70000-to-step-70200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 704      |
|    fps             | 70       |
|    time_elapsed    | 997      |
|    total_timesteps | 70400    |
| train/             |          |
|    actor_loss      | -111     |
|    critic_loss     | 0.44     |
|    ent_coef        | 0.0109   |
|    ent_coef_loss   | -0.403   |
|    learning_rate   | 0.0001   |
|    n_updates       | 70299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 708      |
|    fps             | 70       |
|    time_elapsed    | 1002     |
|    total_timesteps | 70800    |
| train/             |          |
|

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 764      |
|    fps             | 70       |
|    time_elapsed    | 1078     |
|    total_timesteps | 76400    |
| train/             |          |
|    actor_loss      | -90.8    |
|    critic_loss     | 0.329    |
|    ent_coef        | 0.00858  |
|    ent_coef_loss   | -6.66    |
|    learning_rate   | 0.0001   |
|    n_updates       | 76299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 768      |
|    fps             | 70       |
|    time_elapsed    | 1084     |
|    total_timesteps | 76800    |
| train/             |          |
|    actor_loss      | -91.7    |
|    critic_loss     | 0.284    |
|    ent_coef        | 0.00855  |
|    ent_coef_loss   | -5.27    |
|    learning_

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-80000-to-step-80200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 804      |
|    fps             | 70       |
|    time_elapsed    | 1137     |
|    total_timesteps | 80400    |
| train/             |          |
|    actor_loss      | -80.5    |
|    critic_loss     | 0.21     |
|    ent_coef        | 0.00702  |
|    ent_coef_loss   | 5.88     |
|    learning_rate   | 0.0001   |
|    n_updates       | 80299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 808      |
|    fps             | 70       |
|    time_elapsed    | 1142     |
|    total_timesteps | 80800    |
| train/             |          |
|

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 864      |
|    fps             | 70       |
|    time_elapsed    | 1222     |
|    total_timesteps | 86400    |
| train/             |          |
|    actor_loss      | -67      |
|    critic_loss     | 0.14     |
|    ent_coef        | 0.00524  |
|    ent_coef_loss   | -7.55    |
|    learning_rate   | 0.0001   |
|    n_updates       | 86299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 868      |
|    fps             | 70       |
|    time_elapsed    | 1227     |
|    total_timesteps | 86800    |
| train/             |          |
|    actor_loss      | -65.6    |
|    critic_loss     | 0.17     |
|    ent_coef        | 0.00508  |
|    ent_coef_loss   | -4.1     |
|    learning_

                                                                                                                       

MoviePy - Done !
MoviePy - video ready C:\Users\13233\Documents\Robotics Project\Dexterous-Manipulation\videos\rl-agent-step-90000-to-step-90200.mp4
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 904      |
|    fps             | 70       |
|    time_elapsed    | 1279     |
|    total_timesteps | 90400    |
| train/             |          |
|    actor_loss      | -58.9    |
|    critic_loss     | 0.109    |
|    ent_coef        | 0.00426  |
|    ent_coef_loss   | 0.494    |
|    learning_rate   | 0.0001   |
|    n_updates       | 90299    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 908      |
|    fps             | 70       |
|    time_elapsed    | 1284     |
|    total_timesteps | 90800    |
| train/             |          |
|

OSError: [Errno 28] No space left on device