In [1]:
import os
import gymnasium as gym
import panda_gym
from stable_baselines3 import DDPG, HerReplayBuffer
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.noise import NormalActionNoise
import time
import numpy as np
import visioncraft
from visioncraft.baseline.callbacks import SuccessThresholdCallback, NoiseDecayCallback
from visioncraft.baseline.utils import exponential_schedule

pybullet build time: Jan 29 2025 23:19:57


In [2]:
visioncraft_root = os.path.dirname(os.path.dirname(os.path.abspath(visioncraft.__file__)))
TRAINING_DIR = os.path.join(visioncraft_root, "baseline_training")
VIDEO_DIR = os.path.join(TRAINING_DIR, "video_training")
LOG_DIR = os.path.join(TRAINING_DIR, "run_training")
MODEL_DIR = os.path.join(TRAINING_DIR, "models")


os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)


In [3]:
TOTAL_TIMESTEPS = [600_000, 600_000, 600_000, 600_000]
ENV_NAME = [
    "LocobotTouch-v0",
    "LocobotGrasp-v0",
    "LocobotLift-v0",
    "LocobotPickPlace-v0",
]

DDPG_PARAMS = {
    "policy": "MultiInputPolicy",
    "learning_rate": exponential_schedule(5e-5, decay_rate=0.9),
    "buffer_size": int(1e6),
    "learning_starts": 1000,
    "batch_size": 256,
    "tau": 0.005,
    "gamma": 0.99,
    "train_freq": (1, "step"),
    "gradient_steps": -1,
    "verbose": 1,
    "device": "cuda",
}


In [4]:
model = None
for i, (env_name, timesteps) in enumerate(zip(ENV_NAME, TOTAL_TIMESTEPS)):
    stage_name = env_name.replace("Locobot", "").replace("Env", "")
    run_id = f"stage{i+1}_{stage_name.lower()}_{int(time.time())}"
    log_path = os.path.join(LOG_DIR, run_id)
    model_path = os.path.join(MODEL_DIR, f"{run_id}.zip")
    
    print(f"--- Training Stage {i+1}: {stage_name} ---")

    # Create environment
    env = gym.make(env_name)

    env = Monitor(env, log_path)
    env = gym.wrappers.RecordVideo(
        env,
        VIDEO_DIR + f"/{run_id}",
        episode_trigger=lambda x: x % 100 == 0,  # Record less frequently if needed
        name_prefix=f"{run_id}",
    )

    # Action noise setup
    n_actions = env.action_space.shape[0]
    sigma_start, sigma_end = 0.2, 0.05
    action_noise = NormalActionNoise(mean=np.zeros(n_actions),
                                 sigma=np.full(n_actions, sigma_start))
    
    
    DDPG_PARAMS["action_noise"] = action_noise

    noise_cb = NoiseDecayCallback(n_actions, timesteps,
                              sigma_start=sigma_start,
                              sigma_end=sigma_end)


    if model is None:
        model = DDPG(
            env=env,
            tensorboard_log=LOG_DIR,
            replay_buffer_class=HerReplayBuffer,
             replay_buffer_kwargs=dict(
                n_sampled_goal=4, # HER: k = 4
                goal_selection_strategy="future",
            ),
            **DDPG_PARAMS,
        )
    else:
        model.set_env(env)
        model.action_noise = (
            action_noise
        )

    callback = SuccessThresholdCallback(
        success_threshold=0.9, check_freq=1000, window_size=200, verbose=0
    )

    model.learn(
        total_timesteps=timesteps,
        log_interval=10,
        tb_log_name=run_id,
        reset_num_timesteps=(i == 0),
        callback=[callback, noise_cb],
    )
    model.save(model_path)
    print(f"Saved model to: {model_path}\n")

    env.close()

print("--- Curriculum Learning Complete ---")

--- Training Stage 1: Touch-v0 ---
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
locobot/camera_depth_linkUsing cpu device
Wrapping the env in a DummyVecEnv.
Logging to /home/vscode/ros_ws/baseline_training/run_training/stage1_touch-v0_1745988258_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | 8.52     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 10       |
|    fps             | 34       |
|    time_elapsed    | 14       |
|    total_timesteps | 500      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | 11.9     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 20       |
|    fps             | 64      

KeyboardInterrupt: 