# Rollout

In [1]:
import torch
import gymnasium as gym
import numpy as np
import mediapy as media
# 필요한 클래스 및 함수 임포트
from cleanrl.cleanrl.ppo_continuous_action import  load_ppo_checkpoint
import cv2

def load_model_and_rollout(model_path, task_id="HalfCheetah-v4", num_episodes=10, 
                           seed=1, gamma=0.99, control_mode='OSC_POSITION',
                            picking_success=False,
                            ):
    """
    저장된 모델을 불러와 환경에서 평가를 수행하는 함수
    """
    visualize = True
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    env, agent = load_ppo_checkpoint(checkpoint_path=model_path, 
                                     task_id=task_id, 
                                     control_mode=control_mode,
                                     seed=seed, 
                                     
                                     gamma=gamma, active_image=True,
                                     )
    def draw_text(env, image_frame, episode_reward, reward):
        image_frame = env.envs[0].image_states['agentview_image']
        image_frame = np.array(image_frame[::-1, :, :], dtype=np.uint8)
        dist = env.envs[0].check_success_dist

        cv2.putText(image_frame, f"Episode Reward: {episode_reward:.2f}", (10, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(image_frame, f"Reward: {reward[0]:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        #draw end effector position
        #gripper_pos = env.envs[0].sim.data.get_site_xpos(env.envs[0].robots[0].gripper.important_sites["grip_site"])
        gripper_pos = env.envs[0].get_ee_pose()
        cv2.putText(image_frame, f"EE Pos: {gripper_pos[0]:.2f}, {gripper_pos[1]:.2f}, {gripper_pos[2]:.2f}", (10, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        
        # draw dist at bottom
        cv2.putText(image_frame, f"Dist: {dist:.4f}", (10, 200), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
        return image_frame
    
    rollout_horizon = 200
    #agent.eval()
    # 평가 수행
    total_rewards = []
    
    
    frames = []

    for episode in range(num_episodes):
        obs, _ = env.reset()
        env.envs[0].set_grasp_lock(True)

        obs = torch.Tensor(obs).to(device)
        done = False
        episode_reward = 0
        
        image_frame = env.envs[0].image_states['agentview_image']
        image_frame = np.array(image_frame[::-1, :, :], dtype=np.uint8)  # 명시적으로 numpy 배열로 변환
        episode_frames = []
        
        episode_frames.append(image_frame)
        #episode_frames = cv2.cvtColor(image_frame, cv2.COLOR_RGB2BGR)        
        for i in range(rollout_horizon):
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                action, _, _, _ = agent.get_action_and_value(obs)
            obs, reward, terminations, truncations, info = env.step(action.cpu().numpy())
            #print(f"reward: {reward}, terminations: {terminations}, truncations: {truncations}, infos: {infos}")
            done = np.logical_or(terminations, truncations).any()
            episode_reward += reward[0]  # 첫 번째 환경의 보상 합산

            image_frame = draw_text(env, image_frame, episode_reward, reward)

            if env.envs[0].is_success:
                cv2.putText(image_frame, "Success", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            episode_frames.append(image_frame)
    
            
            # check last frame
            if (i == rollout_horizon - 1) or env.envs[0].is_success:
                if not picking_success :
                    frames += episode_frames
                    break
                else :
                    if env.envs[0].is_success:
                        frames += episode_frames
                        print("Success, add frames")
                        break
                    else :
                        print("Fail, drop frames")
                        break
        if not env.envs[0].is_success:
            continue
        
        ### Grasp Can
        grasp_horizon = 10
        grasp_frames = []
        env.envs[0].set_grasp_lock(False)
        for i in range(grasp_horizon):
            #print(f"Grasp step {i}")
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                action = torch.tensor([0,0,0,1])
            obs, reward, terminations, truncations, info = env.step([action.cpu().numpy()])
            
            image_frame = draw_text(env, image_frame, episode_reward, reward)
            grasp_frames.append(image_frame)
        frames += grasp_frames
        
        
        ### lift Can
        lift_frames = []
        target_pos = [0.03, -0.23, 1.1]
        # while target reached
        max_steps = 50
        step_count = 0
        env.envs[0].set_grasp_lock(False)

        while np.linalg.norm(np.array(env.envs[0].get_ee_pose()) - np.array(target_pos)) > 0.05 and step_count < max_steps:
            step_count += 1
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                
                cur_ee_pos = env.envs[0].get_ee_pose()
                vector = np.array(target_pos) - np.array(cur_ee_pos)        
                vector = np.concatenate([vector, [1]])
                action = torch.tensor(vector)*3
                # add noise
                action += np.random.normal(0, 0.5, action.shape)
                action[3] = 1
            obs, reward, terminations, truncations, info = env.step([action.cpu().numpy()])
        
            image_frame = draw_text(env, image_frame, episode_reward, reward)

            lift_frames.append(image_frame)
        frames += lift_frames
        
        ### Move Can
        place_frames = []
        target_pos = [-0.01, 0.22, 1.07]
        max_steps = 50
        step_count = 0
        env.envs[0].set_grasp_lock(False)
        while np.linalg.norm(np.array(env.envs[0].get_ee_pose()) - np.array(target_pos)) > 0.05 and step_count < max_steps:
            #print(f"place step {i}")
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                cur_ee_pos = env.envs[0].get_ee_pose()
                vector = np.array(target_pos) - np.array(cur_ee_pos)        
                vector = np.concatenate([vector, [1]])
                action = torch.tensor(vector)*3
                # add noise
                action += np.random.normal(0, 0.5, action.shape)
                action[3] = 1
            obs, reward, terminations, truncations, info = env.step([action.cpu().numpy()])
            image_frame = draw_text(env, image_frame, episode_reward, reward)
            
            place_frames.append(image_frame)
        frames += place_frames
        
        ### Release Can
        release_horizon = 10
        release_frames = []
        env.envs[0].set_grasp_lock(False)
        for i in range(release_horizon):
            #print(f"release step {i}")
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                action = torch.tensor([0,0,0,-1])
            obs, reward, terminations, truncations, info = env.step([action.cpu().numpy()])
            
            image_frame = draw_text(env, image_frame, episode_reward, reward)
            release_frames.append(image_frame)
        frames += release_frames
        
        ### Padding frames
        padding_horizon = 30
        padding_frames = []
        env.envs[0].set_grasp_lock(False)
        for i in range(padding_horizon):
            #print(f"padding step {i}")
            with torch.no_grad():
                obs = torch.Tensor(obs).to(device)
                action = torch.tensor([0,0,0,0])
            obs, reward, terminations, truncations, info = env.step([action.cpu().numpy()])
            
            image_frame = draw_text(env, image_frame, episode_reward, reward)
            
            padding_frames.append(image_frame)
        frames += padding_frames
        
                
        print(f"Episode {episode + 1}: Total Reward: {episode_reward}")
        total_rewards.append(episode_reward)

    env.close()
    # 평균 리턴 출력
    avg_reward = np.mean(total_rewards)
    print(f"Average Reward over {num_episodes} episodes: {avg_reward}")
    if len(frames) > 0:
        media.show_video(frames, fps=30)
    else :
        print("No frames to show")

# 저장된 모델 불러와 평가하기
#model_path = "/research/rs4tmr/cleanrl/cleanrl/runs/tr__ppo_continuous_action__s1__2024-09-26_05-11-00/ppo_continuous_action_380928.cleanrl_model"
model_path = "runs/pickplace_ppo_pp,yaxis,004_s57523__2024-10-08 14_06_04/ppo_continuous_action_401408"#.cleanrl_model"  # 모델 경로 지정
load_model_and_rollout(model_path, task_id="pickplace", num_episodes=1, control_mode="OSC_POSITION", picking_success = False)  # 모델 불러와 평가 수행



### controller_config: OSC_POSITION ###
#### J PickPlace ####
fix_object:False
start with grasp lock: True
control_freq: 20
ignore_done: False
Episode 1: Total Reward: 0.26503242774964797
Average Reward over 1 episodes: 0.26503242774964797


0
This browser does not support the video tag.


# Evaluate

In [8]:
import torch
import gymnasium as gym
import numpy as np
import mediapy as media
# 필요한 클래스 및 함수 임포트
from cleanrl.cleanrl.ppo_continuous_action import  load_model_and_evaluate
import cv2
import warnings
import wandb
warnings.filterwarnings("ignore")


model_path = "runs/pickplace_ppo_pp,yaxis,004_s57523__2024-10-08 14_06_04/ppo_continuous_action_401408"#.cleanrl_model"  # 모델 경로 지정
load_model_and_evaluate(model_path=model_path, task_id="pickplace", num_episodes=100, verbose=True, control_mode='OSC_POSITION')


init_env: task_id: pickplace
### controller_config: OSC_POSITION ###
#### J PickPlace ####
fix_object:False
start with grasp lock: True
control_freq: 20
ignore_done: False
########################
### Observation keys ###
Key: robot0_eef_pos, size: 3
Key: robot0_eef_quat, size: 4
Key: robot0_eef_vel_lin, size: 3
Key: robot0_eef_vel_ang, size: 3
Key: robot0_gripper_qpos, size: 6
Key: robot0_gripper_qvel, size: 6
Key: Can_pos, size: 3
Key: Can_quat, size: 4
Key: Can_to_robot0_eef_pos, size: 3
Key: Can_to_robot0_eef_quat, size: 4
Key: robot0_proprio-state, size: 25
Key: object-state, size: 14
Total observation size: 78
########################
####### Options ########
task_id: pickplace
active_rewards: rghl
control_mode: OSC_POSITION
reward_shaping: True
fix_object: False
active_image: False
wandb_enabled: False
########################
Using CUDA
Episode 1: Total Reward: 1.1674676723586783, Success: True, 39 step
Episode 2: Total Reward: 2.074987319343643, Success: True, 106 step
Episode

0.82