In [1]:
import os

import gymnasium as gym
import panda_gym
import torch as th
from huggingface_sb3 import load_from_hub, package_to_hub

from stable_baselines3 import A2C, PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, FlattenExtractor
from huggingface_hub import notebook_login

In [2]:
env_id = "PandaSlide-v3"

In [3]:
env = make_vec_env(env_id, n_envs=16)
env = VecNormalize(env,norm_obs=True, norm_reward=True)

pybullet build time: Nov 28 2023 23:45:17


argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color

In [4]:
policy_ppo = {
    "net_arch":  [dict(pi=[512, 512,512], vf=[512, 512,512])],
    "activation_fn": th.nn.ReLU
}

In [5]:
model_ppo = PPO(
    policy="MultiInputPolicy",
    env=env,
    policy_kwargs=policy_ppo,
    verbose=1,
    learning_rate=1.5e-4,  # Adjusted learning rate
    n_steps=2048,  # Increased n_steps for more stable updates
    batch_size=2048,  # Appropriate batch size
    n_epochs=20,  
    gamma=0.98,
    gae_lambda=0.95,
    clip_range=0.2,  
    ent_coef=0.01,
    vf_coef=0.5,
    max_grad_norm=0.5,
    tensorboard_log="./ppo" + env_id + "/",
)

Using cuda device




In [6]:
model_ppo.learn(10_000_000)

Logging to ./ppoPandaSlide-v3/PPO_2


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 49.7     |
|    ep_rew_mean     | -49.7    |
|    success_rate    | 0.02     |
| time/              |          |
|    fps             | 526      |
|    iterations      | 1        |
|    time_elapsed    | 62       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 49.7        |
|    ep_rew_mean          | -49.7       |
|    success_rate         | 0.01        |
| time/                   |             |
|    fps                  | 523         |
|    iterations           | 2           |
|    time_elapsed         | 125         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.005682204 |
|    clip_fraction        | 0.0313      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.27     

<stable_baselines3.ppo.ppo.PPO at 0x7fc9895a0370>

In [7]:
model_ppo.save("ppo-PandaSlide")
env.save("PandaSlide_vec_normalize_ppo.pkl")

In [8]:
eval_env = DummyVecEnv([lambda: gym.make(env_id)])
eval_env = VecNormalize.load("PandaSlide_vec_normalize_ppo.pkl", eval_env)

# We need to override the render_mode
eval_env.render_mode = "rgb_array"

#  do not update them at test time
eval_env.training = False
# reward normalization is not needed at test time
eval_env.norm_reward = False

# Load the agent
model = PPO.load("ppo-PandaSlide")

mean_reward, std_reward = evaluate_policy(model, eval_env)

print(f"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")

argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886




Mean reward = -42.60 +/- 12.33


In [9]:
notebook_login()
!git config --global credential.helper store

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [10]:
from huggingface_sb3 import package_to_hub

package_to_hub(
    model=model,
    model_name=f"ppo-{env_id}",
    model_architecture="PPO",
    env_id=env_id,
    eval_env=eval_env,
    repo_id=f"Hevagog/ppo-{env_id}",
    commit_message="Initial Push",
)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m




Saving video to /tmp/tmp13op90yz/-step-0-to-step-1000.mp4
Moviepy - Building video /tmp/tmp13op90yz/-step-0-to-step-1000.mp4.
Moviepy - Writing video /tmp/tmp13op90yz/-step-0-to-step-1000.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /tmp/tmp13op90yz/-step-0-to-step-1000.mp4


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

[38;5;4mℹ Pushing repo Hevagog/ppo-PandaSlide-v3 to the Hugging Face Hub[0m


policy.pth:   0%|          | 0.00/4.32M [00:00<?, ?B/s]

Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]

pytorch_variables.pth:   0%|          | 0.00/864 [00:00<?, ?B/s]

ppo-PandaSlide-v3.zip:   0%|          | 0.00/13.0M [00:00<?, ?B/s]

policy.optimizer.pth:   0%|          | 0.00/8.64M [00:00<?, ?B/s]

vec_normalize.pkl:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/Hevagog/ppo-PandaSlide-v3/tree/main/[0m


CommitInfo(commit_url='https://huggingface.co/Hevagog/ppo-PandaSlide-v3/commit/75d3823878fb685100db8ce628e7309a5839b9b4', commit_message='Initial Push', commit_description='', oid='75d3823878fb685100db8ce628e7309a5839b9b4', pr_url=None, pr_revision=None, pr_num=None)

---
