In [1]:
import torch
import gymnasium as gym
import numpy as np
import mediapy as media
# 필요한 클래스 및 함수 임포트
from cleanrl.cleanrl.ppo_continuous_action import  load_ppo_checkpoint
import cv2

def load_model_and_evaluate(model_path, task_id="HalfCheetah-v4", num_episodes=10, seed=1, gamma=0.99):
    """
    저장된 모델을 불러와 환경에서 평가를 수행하는 함수
    """
    visualize = True
    frames = []
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    env, agent = load_ppo_checkpoint(model_path, task_id, seed, gamma, active_image=True)

    # 평가 수행
    total_rewards = []
    for episode in range(num_episodes):
        obs, _ = env.reset()
        obs = torch.Tensor(obs).to(device)
        done = False
        episode_reward = 0
        
        image_frame = env.envs[0].image_states['agentview_image']
        image_frame = np.array(image_frame[::-1, :, :], dtype=np.uint8)  # 명시적으로 numpy 배열로 변환
        frames.append(image_frame)
        # convert image_frame cv2 image
        image_frame = cv2.cvtColor(image_frame, cv2.COLOR_RGB2BGR)
                
        for i in range(200):
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                action, _, _, _ = agent.get_action_and_value(obs)
            obs, reward, terminations, truncations, info = env.step(action.cpu().numpy())
            #print(f"reward: {reward}, terminations: {terminations}, truncations: {truncations}, infos: {infos}")
            done = np.logical_or(terminations, truncations).any()
            episode_reward += reward[0]  # 첫 번째 환경의 보상 합산
            
            # 새로운 프레임 가져오기 및 변환
            image_frame = env.envs[0].image_states['agentview_image']
            image_frame = np.array(image_frame[::-1, :, :], dtype=np.uint8)  # numpy 배열로 변환

            #image_frame = cv2.cvtColor(image_frame, cv2.COLOR_RGB2BGR)
            # draw text on image_frame episode reward, reward, small text
            cv2.putText(image_frame, f"Episode Reward: {episode_reward:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            cv2.putText(image_frame, f"Reward: {reward[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            #print(env.envs[0].check_success())
            # sucess
            if done:
                print("Success")
                cv2.putText(image_frame, "Success", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            frames.append(image_frame)
            
            
        print(f"Episode {episode + 1}: Total Reward: {episode_reward}")
        total_rewards.append(episode_reward)

    env.close()
    # 평균 리턴 출력
    avg_reward = np.mean(total_rewards)
    print(f"Average Reward over {num_episodes} episodes: {avg_reward}")
    
    media.show_video(frames, fps=30)





In [2]:
# 저장된 모델 불러와 평가하기
#model_path = "/research/rs4tmr/cleanrl/cleanrl/runs/tr__ppo_continuous_action__s1__2024-09-26_05-11-00/ppo_continuous_action_380928.cleanrl_model"
model_path = "/research/rs4tmr/cleanrl/cleanrl/runs/lift_norm_save_test_s1__2024-09-30 12:26:40/ppo_continuous_action_991232"#.cleanrl_model"  # 모델 경로 지정
load_model_and_evaluate(model_path, task_id="lift", num_episodes=1)

Initalized env with init_env
Lift
control_freq: 20
########################
### Observation keys ###
Key: robot0_eef_pos, size: 3
Key: robot0_eef_quat, size: 4
Key: robot0_eef_vel_lin, size: 3
Key: robot0_eef_vel_ang, size: 3
Key: robot0_gripper_qpos, size: 6
Key: robot0_gripper_qvel, size: 6
Key: gripper_to_cube_pos, size: 3
Key: robot0_proprio-state, size: 25
Key: object-state, size: 3
Total observation size: 56
########################
####### Options ########
task_id: lift
active_rewards: rghl
control_mode: OSC_POSITION
reward_shaping: True
fix_object: False
active_image: True
wandb_enabled: False
########################
Using CUDA


  agent.load_state_dict(torch.load(weight_path, map_location=device))
  logger.warn(


Success
Success
Success
Episode 1: Total Reward: 1.3789967746656029
Average Reward over 1 episodes: 1.3789967746656029


0
This browser does not support the video tag.


# Evaluate

In [4]:
import torch
import gymnasium as gym
import numpy as np
import mediapy as media
# 필요한 클래스 및 함수 임포트
from cleanrl.cleanrl.ppo_continuous_action import  load_ppo_checkpoint
import cv2
import warnings
import wandb
warnings.filterwarnings("ignore")

def load_model_and_evaluate(model_path, global_step=None,task_id="HalfCheetah-v4", num_episodes=10, seed=1, gamma=0.99, verbose = False, wandb_log = False):
    """
    저장된 모델을 불러와 환경에서 평가를 수행하는 함수
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    env, agent = load_ppo_checkpoint(model_path, task_id, seed, gamma, active_image=False, verbose=verbose)
    eval_horizon = 200  # 평가 시 사용할 에피소드 길이
    num_episodes = num_episodes
    count_sucess = 0
    # 평가 수행
    total_rewards = []
    for episode in range(num_episodes):
        obs, _ = env.reset()
        obs = torch.Tensor(obs).to(device)
        done = False
        episode_reward = 0
          
        for i in range(eval_horizon):
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                action, _, _, _ = agent.get_action_and_value(obs)
            obs, reward, terminations, truncations, info = env.step(action.cpu().numpy())
            #print(f"reward: {reward}, terminations: {terminations}, truncations: {truncations}, infos: {infos}")
            done = np.logical_or(terminations, truncations).any()
            episode_reward += reward[0]  # 첫 번째 환경의 보상 합산
            
            if terminations:
                count_sucess += 1
                break            
        if verbose:
            print(f"Episode {episode + 1}: Total Reward: {episode_reward}, Success: {terminations}, {i} step")
        total_rewards.append(episode_reward)

    env.close()
    if verbose :
        print(f"Success Rate : {count_sucess/num_episodes}  {count_sucess}/{num_episodes}")
    if wandb_log:
        wandb.log({"success_rate": count_sucess/num_episodes}, global_step=global_step)

model_path = "/research/rs4tmr/cleanrl/cleanrl/runs/lift_norm_save_test_s1__2024-09-30 12:26:40/ppo_continuous_action_991232"#.cleanrl_model"  # 모델 경로 지정
load_model_and_evaluate(model_path, task_id="lift", num_episodes=50, verbose=True)


Initalized env with init_env
Lift
control_freq: 20
########################
### Observation keys ###
Key: robot0_eef_pos, size: 3
Key: robot0_eef_quat, size: 4
Key: robot0_eef_vel_lin, size: 3
Key: robot0_eef_vel_ang, size: 3
Key: robot0_gripper_qpos, size: 6
Key: robot0_gripper_qvel, size: 6
Key: gripper_to_cube_pos, size: 3
Key: robot0_proprio-state, size: 25
Key: object-state, size: 3
Total observation size: 56
########################
####### Options ########
task_id: lift
active_rewards: rghl
control_mode: OSC_POSITION
reward_shaping: True
fix_object: False
active_image: False
wandb_enabled: False
########################
Using CUDA
Episode 1: Total Reward: 1.2597054078941834, Success: [False], 199 step
Episode 2: Total Reward: 0.31152245844995036, Success: [ True], 40 step
Episode 3: Total Reward: 0.7106766966411338, Success: [ True], 88 step
Episode 4: Total Reward: 1.051910310246843, Success: [False], 199 step
Episode 5: Total Reward: 1.3302277978435553, Success: [False], 199 s