In [1]:
import torch
import gymnasium as gym
import numpy as np
import mediapy as media
# 필요한 클래스 및 함수 임포트
from cleanrl.cleanrl.ppo_continuous_action import Agent, Args, ppo_make_env
import cv2

def load_model_and_evaluate(model_path, task_id="HalfCheetah-v4", num_episodes=10, seed=1, gamma=0.99):
    """
    저장된 모델을 불러와 환경에서 평가를 수행하는 함수
    """
    visualize = True
    frames = []
    
    # Argument 설정
    args = Args()
    args.task_id = task_id
    args.seed = seed
    args.gamma = gamma

    # 환경 생성
    env = gym.vector.SyncVectorEnv(
        [ppo_make_env(
            task_id=args.task_id, 
            reward_shaping=args.reward_shaping,
            idx=0, 
            capture_video=False, 
            run_name="eval", 
            gamma= args.gamma, 
            active_rewards="r",
            active_image=True, 
            fix_object=args.fix_object,
            wandb_enabled=False,
            )
        ]
    )
    
    # 디바이스 설정 (cuda가 가능하면 cuda 사용)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if device == torch.device("cuda"):
        print("Using CUDA")
    else :
        assert device == torch.device("cpu")

    # Agent 초기화 및 모델 불러오기
    agent = Agent(env).to(device)
    agent.load_state_dict(torch.load(model_path, map_location=device))
    agent.eval()  # 평가 모드로 전환

    # 평가 수행
    total_rewards = []
    for episode in range(num_episodes):
        obs, _ = env.reset(seed=args.seed)
        obs = torch.Tensor(obs).to(device)
        done = False
        episode_reward = 0
        
        image_frame = env.envs[0].image_states['agentview_image']
        image_frame = np.array(image_frame[::-1, :, :], dtype=np.uint8)  # 명시적으로 numpy 배열로 변환
        frames.append(image_frame)
        # convert image_frame cv2 image
        #image_frame = cv2.cvtColor(image_frame, cv2.COLOR_RGB2BGR)
        
        frames.append(image_frame)
        
        while not done:
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                action, _, _, _ = agent.get_action_and_value(obs)
            obs, reward, terminations, truncations, infos = env.step(action.cpu().numpy())
            done = np.logical_or(terminations, truncations).any()
            episode_reward += reward[0]  # 첫 번째 환경의 보상 합산
            
            # 새로운 프레임 가져오기 및 변환
            image_frame = env.envs[0].image_states['agentview_image']
            image_frame = np.array(image_frame[::-1, :, :], dtype=np.uint8)  # numpy 배열로 변환

            #image_frame = cv2.cvtColor(image_frame, cv2.COLOR_RGB2BGR)
            # draw text on image_frame episode reward, reward, small text
            cv2.putText(image_frame, f"Episode Reward: {episode_reward:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            cv2.putText(image_frame, f"Reward: {reward[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            
            frames.append(image_frame)
            

        print(f"Episode {episode + 1}: Total Reward: {episode_reward}")
        total_rewards.append(episode_reward)

    env.close()

    # 평균 리턴 출력
    avg_reward = np.mean(total_rewards)
    print(f"Average Reward over {num_episodes} episodes: {avg_reward}")
    
    media.show_video(frames, fps=30)





In [6]:
# 저장된 모델 불러와 평가하기
#model_path = "/research/rs4tmr/cleanrl/cleanrl/runs/tr__ppo_continuous_action__s1__2024-09-26_05-11-00/ppo_continuous_action_380928.cleanrl_model"
model_path = "/research/rs4tmr/cleanrl/cleanrl/runs/tr__ppo_continuous_action__s1__2024-09-26_05-49-06/ppo_continuous_action_350208.cleanrl_model"  # 모델 경로 지정
load_model_and_evaluate(model_path, task_id="lift", num_episodes=1)

Initalized env with init_env
### Observation keys ###
Key: robot0_eef_pos, size: 3
Key: robot0_eef_quat, size: 4
Key: robot0_eef_vel_lin, size: 3
Key: robot0_eef_vel_ang, size: 3
Key: robot0_gripper_qpos, size: 6
Key: robot0_gripper_qvel, size: 6
Key: gripper_to_cube_pos, size: 3
Key: robot0_proprio-state, size: 25
Key: object-state, size: 3
Total observation size: 56
########################
Using CUDA


  agent.load_state_dict(torch.load(model_path, map_location=device))


Episode 1: Total Reward: 28.66982275710374
Average Reward over 1 episodes: 28.66982275710374


0
This browser does not support the video tag.
