In [1]:
import numpy as np
import itertools
import trimesh
import math
import k3d
from time import sleep

from IPython.display import clear_output
import matplotlib.pyplot as plt
%matplotlib inline

# !conda install -c conda-forge pyembree
# !conda install -c conda-forge igl
# !pip install Cython
# !pip install gym

In [2]:
def compute_metrics(env, agent, iter_cnt=10, max_iter=30):
    rewards, final_rewards, novp = [], [], []
    for _ in range(iter_cnt):
        state, action, mask = env.reset()
        episode_reward = 0.0
        for t in range(max_iter):
            action =  agent.act(state, mask, epsilon=0.05)
                        
            state, reward, done, info, mask = env.step(action)
            episode_reward += reward

            if done:
                break

        final_reward = 0
        final_reward = env.final_reward()
#         episode_reward += 1.0 / final_reward
        rewards.append(episode_reward)
        final_rewards.append(final_reward)
        novp.append(t + 1)
    return np.mean(rewards), np.mean(final_rewards), np.mean(novp)

In [12]:
from rl.environment import *

def create_env(model_path=None):    
    env = Environment(model_path=model_path,
                      image_size=1024,
                      number_of_view_points=100)

    env = MeshReconstructionWrapper(env, reconstruction_depth=8, final_depth=10, scale_factor=8,
                                    do_step_reconstruction=True)
    env = VoxelGridWrapper(env, occlusion_reward=True, grid_size=64)
    env = CombiningObservationsWrapper(env)
    env = VoxelWrapper(env)
    env = StepPenaltyRewardWrapper(env)
    env = FrameStackWrapper(env, num_stack=4, lz4_compress=False)
    env = ActionMaskWrapper(env)
    return env


### DQN

In [13]:
from tqdm import tqdm_notebook as tqdm
from rl.agent import *

In [14]:
import torch
from rl.dqn import *

device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')

model = torch.load("./models/abc-vdqn-occl-fix-rew/last-150000.pt")
agent = DQNAgent(env.observation_space.shape, env.action_space.n,
                 device=device)

agent.model = model.to(device)

In [None]:
%%time
models_path = "./data/1kabc/simple/val/"
result = {}
for model_path in tqdm(sorted(os.listdir(models_path))):
    env = create_env(os.path.join(models_path, model_path))
    result[model_path] = compute_metrics(env, agent, max_iter=100, iter_cnt=1)
    print(model_path, result[model_path])

### Random

In [None]:
%%time
random_agent_func = lambda s : env.action_space.sample()
models_path = "./data/10abc/"
result = {}
for model_path in os.listdir(models_path):
    env = create_env(os.path.join(models_path, model_path))
    result[model_path] = compute_metrics(env, random_agent_func, max_iter=30)
    print(model_path, result[model_path])

00731313_58fc1a1bb4b5be10cb401503_003.obj (-2.5757095519377606, 0.25795545337154036, 4.3)
00942098_bd039b3a4a4efa75e86b7350_000.obj (-3.0503391472868215, 0.10782423963311767, 4.9)
