In [1]:
import numpy as np
import random
import pickle
import matplotlib.pyplot as plt
import torch
import os
import sys
sys.path.append('..')

from utils.data_process import data_process

In [137]:
env_list = ['ShadowHand', 'ShadowHandCatchAbreast', 'ShadowHandOver', 'ShadowHandBlockStack', 'ShadowHandCatchUnderarm',
        'ShadowHandCatchOver2Underarm', 'ShadowHandBottleCap', 'ShadowHandLiftUnderarm', 'ShadowHandTwoCatchUnderarm', 'ShadowHandDoorOpenInward',
        'ShadowHandDoorOpenOutward', 'ShadowHandDoorCloseInward', 'ShadowHandDoorCloseOutward', 'ShadowHandPushBlock', 'ShadowHandKettle',
        'ShadowHandScissors', 'ShadowHandPen', 'ShadowHandSwingCup', 'ShadowHandGraspAndPlace', 'ShadowHandSwitch']


In [6]:
def eval_rm(seed_list, traj_path, model_idx, obs_dim, act_dim, frame_number, reward_model, verbose=False):
    scores = {env: {} for env in env_list}
    for env_name in env_list:
        for seed in seed_list:
            path = os.path.join(traj_path, f'seed{seed}')
            scores[env_name][seed] = {'scores': [],
                                        'mean': 0,
                                        'std': 0
            }
            file__cnt = 0
            file_path = os.path.join(path, env_name)
            try:
                files = os.listdir(file_path)
            except:
                print('No such file or directory: ', file_path)
                continue
            for file in files:
                # if file.endswith('.pkl'):
                if file.endswith('.pkl') and file.split('_')[4] in model_idx:  # name: ShadowHandBlockStack_ppo_4_20221211002936_20000_traj-episode-0.pkl
                    # print(file)
                    file__cnt += 1 
                    with open(os.path.join(path, env_name, file), 'rb') as f:
                        traj = pickle.load(f)
                        obs, action = data_process(traj, env_name)
                        # print(obs.shape, action.shape)
                        if obs.shape[1] ==  2*obs_dim and action.shape[1] == 2*act_dim:
                            obs = obs.reshape(-1, 2, obs_dim)
                            action = action.reshape(-1, 2, act_dim)
                            # print(obs.shape, action.shape)
                            oa = np.concatenate((obs, action), axis=-1).swapaxes(0,1) # (2, traj_length, obs_dim+act_dim)
                            # print(oa.shape)
                            slice_oa = np.array([[np.concatenate(s[i:i+frame_number]) for i in range(s.shape[0]-frame_number+1)] for s in oa]) # (2, batch, frame_number*(obs_dim+act_dim) )
                            # print(slice_oa.shape)
                        else:
                            oa = np.concatenate((obs, action), axis=-1)
                            slice_oa = np.array([np.concatenate(oa[i:i+frame_number]) for i in range(oa.shape[0]-frame_number+1)])  # (batch, frame_number*(obs_dim+act_dim) )
                            # print(slice_oa.shape)
                        score = reward_model(torch.tensor(slice_oa)) 
                        # print(score.shape)
                        mean_score = torch.mean(score).detach().numpy()
                        # print(mean_score)
                        scores[env_name][seed]['scores'].append(mean_score)
            # get avg score for seed over trajs
            scores[env_name][seed]['mean'] = np.mean(scores[env_name][seed]['scores'])  
            scores[env_name][seed]['std'] = np.std(scores[env_name][seed]['scores'])                   

            if verbose:
                if file__cnt == 0:
                    print(f'Env {env_name} seed {seed} model index {model_idx} not found.')
                else:
                    print(f'Env {env_name} seed {seed} model index {model_idx} found {file__cnt} trajs.')

        # break
    return scores

In [135]:
def plot_bar_result(scores, frame_number, itr):
    # plot
    os.makedirs(f'figs/itr{itr}/model_{frame_number}/bar', exist_ok=True)
    env = env_list[0]
    for env in env_list:
        plt.figure(figsize=(6, 5))
        x=list(scores[env].keys())
        x=[str(seed) for seed in x]
        y=[scores[env][seed]['mean'] for seed in scores[env].keys()]
        # create bar plot
        for i, v in enumerate(y):
            if v < 0:
                color = 'r'
            else:
                color = 'g'
            plt.bar(x[i], v, yerr=[scores[env][seed]['std'] for seed in scores[env].keys()][i], color=color, align='center',capsize=5, linewidth=2, alpha=0.5, ecolor='black')
        # add value labels on top of bars
        for i, v in enumerate(y):
            if v > 0:
                plt.text(i-0.1, v+0.03, str(round(v, 3)), fontsize=10)
            else:
                plt.text(i-0.1, v-0.03, str(round(v, 3)), fontsize=10)

        plt.xlabel('Model Index')
        plt.ylabel('Human Preference Score')
        plt.title(f'Reward Model Evaluation: {env}')
        # autoscale the plot to include all bars and labels
        plt.autoscale(tight=False)
        # plt.tight_layout()
        plt.savefig(f'figs/itr{itr}/model_{frame_number}/bar/{env}_reward_model_eval.png', dpi=300)
        # plt.show()
        plt.clf()

In [140]:
import seaborn as sns

def plot_violin_result(scores, frame_number, itr):
    # plot
    os.makedirs(f'figs/itr{itr}/model_{frame_number}/violin', exist_ok=True)
    env = env_list[0]
    for env in env_list:
        plt.figure(figsize=(6, 5))
        x=list(scores[env].keys())
        # x=[str(seed) for seed in x]
        means=[scores[env][seed]['mean'] for seed in scores[env].keys()]
        y=[np.array(scores[env][seed]['scores']) for seed in scores[env].keys()]
        # create violin plot
        colors = []
        for i, v in enumerate(means):
            if v < 0:
                colors.append('#F08030') # orange
            else:
                colors.append('#6890F0') # blue
        sns.violinplot(data=y, inner='box', palette = colors, saturation=0.7) # box: 25, 50, 75 quantiles
        # add value labels on top of bars
        # for i, v in enumerate(y):
        #     if v > 0:
        #         plt.text(i-0.1, v+0.03, str(round(v, 3)), fontsize=10)
        #     else:
        #         plt.text(i-0.1, v-0.03, str(round(v, 3)), fontsize=10)

        # plt.xticks(range(len(x)), x)  # comment this give 0-9
        plt.grid(True)
        plt.xlabel('Model Index')
        plt.ylabel('Human Preference Score')
        plt.title(f'Reward Model Evaluation: {env}')
        # autoscale the plot to include all bars and labels
        plt.autoscale(tight=False)
        # plt.tight_layout()
        plt.savefig(f'figs/itr{itr}/model_{frame_number}/violin/{env}_reward_model_eval.png', dpi=300)
        # plt.show()
        plt.clf()

In [141]:
itr=1
frame_number = 8
obs_dim=24
act_dim=20
device='cpu'
algo='ppo'
model_idx = ['20000']
# seed_list = [3,4,5,6,7,8,9,10,11,12,13,14]
seed_list = [5,6,7,8,9,10,11,12,13,14] # select 10

reward_model = torch.jit.load(f'./reward_model/itr{itr}/model_{frame_number}_gpu.pt', map_location=device)
traj_path = 'data/videos'
scores = eval_rm(seed_list, traj_path, model_idx, obs_dim, act_dim, frame_number, reward_model)
plot_bar_result(scores, frame_number, itr)
plot_violin_result(scores, frame_number, itr)


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims, where=where)
  subok=False)
  ret = ret.dtype.type(ret / rcount)


No such file or directory:  data/videos/seed5/ShadowHandLiftUnderarm
No such file or directory:  data/videos/seed6/ShadowHandLiftUnderarm
No such file or directory:  data/videos/seed7/ShadowHandLiftUnderarm
No such file or directory:  data/videos/seed8/ShadowHandLiftUnderarm
No such file or directory:  data/videos/seed9/ShadowHandLiftUnderarm
No such file or directory:  data/videos/seed14/ShadowHandLiftUnderarm
No such file or directory:  data/videos/seed5/ShadowHandKettle
No such file or directory:  data/videos/seed6/ShadowHandKettle
No such file or directory:  data/videos/seed7/ShadowHandKettle
No such file or directory:  data/videos/seed8/ShadowHandKettle
No such file or directory:  data/videos/seed9/ShadowHandKettle
No such file or directory:  data/videos/seed14/ShadowHandKettle


posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
  


<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

In [132]:
# select 16 envs (Grasp and Place has problem)
env_list = ['ShadowHand', 'ShadowHandCatchAbreast', 'ShadowHandOver', 'ShadowHandBlockStack', 'ShadowHandCatchUnderarm',
        'ShadowHandCatchOver2Underarm', 'ShadowHandBottleCap', 'ShadowHandLiftUnderarm', 'ShadowHandTwoCatchUnderarm', 'ShadowHandDoorOpenInward',
        'ShadowHandDoorOpenOutward', 'ShadowHandDoorCloseInward', 'ShadowHandPushBlock',
        'ShadowHandScissors', 'ShadowHandPen', 'ShadowHandSwitch']


In [133]:
itr=2
frame_number = 8
obs_dim=24
act_dim=20
device='cpu'
algo='ppo'
model_idx = ['1000', '2000', '3000', '4000', '5000']
seed_list = [20,21,22,23,24,25,26,27,28,29]
reward_model = torch.jit.load(f'./reward_model/itr{itr}/model_{frame_number}_gpu.pt', map_location=device)
traj_path = 'data/videos'
scores = eval_rm(seed_list, traj_path, model_idx, obs_dim, act_dim, frame_number, reward_model, verbose=False)
plot_bar_result(scores, frame_number, itr)
plot_violin_result(scores, frame_number, itr)


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims, where=where)
  subok=False)
  ret = ret.dtype.type(ret / rcount)


<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

<Figure size 432x360 with 0 Axes>

In [131]:
# select 17 envs
env_list = ['ShadowHand', 'ShadowHandCatchAbreast', 'ShadowHandOver', 'ShadowHandBlockStack', 'ShadowHandCatchUnderarm',
        'ShadowHandCatchOver2Underarm', 'ShadowHandBottleCap', 'ShadowHandLiftUnderarm', 'ShadowHandTwoCatchUnderarm', 'ShadowHandDoorOpenInward',
        'ShadowHandDoorOpenOutward', 'ShadowHandDoorCloseInward', 'ShadowHandGraspAndPlace', 'ShadowHandPushBlock',
        'ShadowHandScissors', 'ShadowHandPen', 'ShadowHandSwitch']