In [3]:
import torch
import gymnasium as gym
import numpy as np
import mediapy as media
# 필요한 클래스 및 함수 임포트
from cleanrl.cleanrl.ppo_continuous_action import  load_ppo_checkpoint
import cv2

def load_model_and_rollout(model_path, task_id="HalfCheetah-v4", num_episodes=10, seed=1, gamma=0.99, control_mode='OSC_POSITION'):
    """
    저장된 모델을 불러와 환경에서 평가를 수행하는 함수
    """
    visualize = True
    frames = []
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    env, agent = load_ppo_checkpoint(checkpoint_path=model_path, 
                                     task_id=task_id, 
                                     control_mode=control_mode,
                                     seed=seed, 
                                     
                                     gamma=gamma, active_image=True)
    rollout_horizon = 200

    # 평가 수행
    total_rewards = []
    for episode in range(num_episodes):
        obs, _ = env.reset()
        obs = torch.Tensor(obs).to(device)
        done = False
        episode_reward = 0
        
        image_frame = env.envs[0].image_states['agentview_image']
        image_frame = np.array(image_frame[::-1, :, :], dtype=np.uint8)  # 명시적으로 numpy 배열로 변환
        frames.append(image_frame)
        # convert image_frame cv2 image
        image_frame = cv2.cvtColor(image_frame, cv2.COLOR_RGB2BGR)
                
        for i in range(rollout_horizon):
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                action, _, _, _ = agent.get_action_and_value(obs)
            obs, reward, terminations, truncations, info = env.step(action.cpu().numpy())
            #print(f"reward: {reward}, terminations: {terminations}, truncations: {truncations}, infos: {infos}")
            done = np.logical_or(terminations, truncations).any()
            episode_reward += reward[0]  # 첫 번째 환경의 보상 합산
            
            # 새로운 프레임 가져오기 및 변환
            image_frame = env.envs[0].image_states['agentview_image']
            image_frame = np.array(image_frame[::-1, :, :], dtype=np.uint8)  # numpy 배열로 변환

            #image_frame = cv2.cvtColor(image_frame, cv2.COLOR_RGB2BGR)
            # draw text on image_frame episode reward, reward, small text
            cv2.putText(image_frame, f"Episode Reward: {episode_reward:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            cv2.putText(image_frame, f"Reward: {reward[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            #print(env.envs[0].check_success())
            # sucess
            if env.envs[0].is_success:
                cv2.putText(image_frame, "Success", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                env.reset()
            frames.append(image_frame)
            
            
        print(f"Episode {episode + 1}: Total Reward: {episode_reward}")
        total_rewards.append(episode_reward)

    env.close()
    # 평균 리턴 출력
    avg_reward = np.mean(total_rewards)
    print(f"Average Reward over {num_episodes} episodes: {avg_reward}")
    
    media.show_video(frames, fps=30)



In [8]:
# 저장된 모델 불러와 평가하기
#model_path = "/research/rs4tmr/cleanrl/cleanrl/runs/tr__ppo_continuous_action__s1__2024-09-26_05-11-00/ppo_continuous_action_380928.cleanrl_model"
model_path = "runs/lift_ppo_long_learning_s1__2024-10-02 21_37_45/ppo_continuous_action_4800512"#.cleanrl_model"  # 모델 경로 지정
load_model_and_rollout(model_path, task_id="lift", num_episodes=1, control_mode="OSC_POSITION")  # 모델 불러와 평가 수행

### controller_config: OSC_POSITION ###
control_freq: 20
ignore_done: False


# Evaluate

In [5]:
import torch
import gymnasium as gym
import numpy as np
import mediapy as media
# 필요한 클래스 및 함수 임포트
from cleanrl.cleanrl.ppo_continuous_action import  load_model_and_evaluate
import cv2
import warnings
import wandb
warnings.filterwarnings("ignore")


model_path = "runs/lift_ppo_long_learning,OSC_POSE_s1__2024-10-02 21_48_32/ppo_continuous_action_4800512"#.cleanrl_model"  # 모델 경로 지정
load_model_and_evaluate(model_path=model_path, task_id="lift", num_episodes=50, verbose=True, control_mode='OSC_POSE')


init_env: task_id: lift
### controller_config: OSC_POSE ###
control_freq: 20
ignore_done: False
########################
### Observation keys ###
Key: robot0_eef_pos, size: 3
Key: robot0_eef_quat, size: 4
Key: robot0_eef_vel_lin, size: 3
Key: robot0_eef_vel_ang, size: 3
Key: robot0_gripper_qpos, size: 6
Key: robot0_gripper_qvel, size: 6
Key: gripper_to_cube_pos, size: 3
Key: robot0_proprio-state, size: 25
Key: object-state, size: 3
Total observation size: 56
########################
####### Options ########
task_id: lift
active_rewards: rghl
control_mode: OSC_POSE
reward_shaping: True
fix_object: False
active_image: False
wandb_enabled: False
########################
Using CUDA
Episode 1: Total Reward: 2.9907518002453233, Success: False, 199 step
Episode 2: Total Reward: 9.333626540836358, Success: False, 199 step
Episode 3: Total Reward: 7.33703670896344, Success: False, 199 step
Episode 4: Total Reward: 3.2029788842764746, Success: True, 142 step
Episode 5: Total Reward: 2.6847744091