In [1]:
from stable_baselines3.common.callbacks import BaseCallback
import numpy as np

class RewardAndGoalLoggingCallback(BaseCallback):
    def __init__(self, log_interval=20000, verbose=0):
        super().__init__(verbose)
        self.log_interval = log_interval
        self.cumulative_reward = 0
        self.step_counter = 0

    def _on_step(self):
        reward = self.locals["rewards"][0]
        self.cumulative_reward += reward
        self.step_counter += 1

        if self.step_counter % self.log_interval == 0:
            # Get the goal_pos from the environment (VecEnv-safe)
            goal_pos = self.training_env.get_attr("goal_pos")[0]
            ef_pos = self.training_env.get_attr("robot")[0].get_joint_obs()["ee_pos"]
            print(f"📈 Step {self.num_timesteps} | Reward (last {self.log_interval}): {round(self.cumulative_reward, 4)} | 🎯 Goal: {np.round(goal_pos, 3)},f🤖 EE Pos: {np.round(ef_pos, 3)} ")
            self.cumulative_reward = 0

        return True


In [2]:
from stable_baselines3 import PPO
from env import UArmEnv

# Create environment and model
env = UArmEnv(render=False)
model = PPO("MlpPolicy", env, verbose=0)

# Add logging callback
reward_logger = RewardAndGoalLoggingCallback(log_interval=20000)

# Train with logging
model.learn(total_timesteps=1_000_000, callback=reward_logger)

env.close()

model.save("ppo_uarm_reach_goal")


pybullet build time: Jan 29 2025 23:16:28


camera_link



camera_linkGoal position set to: [-0.34217731 -0.1971738   0.04906519]
camera_linkGoal position set to: [-0.03480862 -0.33325294  0.04899883]
camera_linkGoal position set to: [-0.30301966 -0.13164364  0.02907514]
Goal position set to: [ 0.11388582 -0.2294071   0.0471513 ]
camera_linkGoal position set to: [-0.05895102 -0.21453065  0.03500201]

camera_linkGoal position set to: [ 0.10561312 -0.189818    0.04838037]
camera_linkGoal position set to: [-0.07058898 -0.32203448  0.0309241 ]
camera_linkGoal position set to: [ 0.05498109 -0.29453844  0.06124116]
camera_linkGoal position set to: [-0.29620391 -0.24562319  0.05224263]
camera_linkGoal position set to: [ 0.18149775 -0.24524668  0.04301641]
camera_linkGoal position set to: [ 0.27171167 -0.06804449  0.06702027]
camera_linkGoal position set to: [ 0.35308463 -0.05966269  0.02727308]
camera_linkGoal position set to: [-0.17918194 -0.09191329  0.02541086]
camera_linkGoal position set to: [-0.34142864 -0.08507163  0.03081493]
camera_linkGoal 

KeyboardInterrupt: 

In [None]:
model.save("ppo_uarm")
1 000 000