In [1]:
import numpy as np
import itertools
import trimesh
import math
import k3d
from time import sleep

from IPython.display import clear_output
import matplotlib.pyplot as plt
%matplotlib inline

# !conda install -c conda-forge pyembree
# !conda install -c conda-forge igl
# !pip install Cython
# !pip install gym

In [16]:
def compute_metrics(env, agent, iter_cnt=10, max_iter=30):
    rewards, final_rewards, novp = [], [], []
    for _ in range(iter_cnt):
        state, action, mask = env.reset()
        episode_reward = 0.0
        for t in range(max_iter):
            action =  agent.act(state, mask, epsilon=0.05)
                        
            state, reward, done, info, mask = env.step(action)
            # print("REWARD: ", reward)
            # env.render(action, state)
            episode_reward += reward

            if done:
                break

        final_reward = 0
        final_reward = env.final_reward()
#         episode_reward += 1.0 / final_reward
        rewards.append(episode_reward)
        final_rewards.append(final_reward)
        novp.append(t + 1)
    return np.mean(rewards), np.mean(final_rewards), np.mean(novp)

In [3]:
from rl.environment import *

def create_env(model_path=None):    
    env = Environment(model_path=model_path,
    #                   similarity_threshold=similarity_threshold,
                      image_size=512,
                      number_of_view_points=100)
    # env = CombiningObservationsWrapper(env)
    # env = StepPenaltyRewardWrapper(env, weight=1.0)
    # env = DepthMapWrapper(env)

    env = MeshReconstructionWrapper(env, reconstruction_depth=10)
    env = VoxelGridWrapper(env, occlusion_reward=True, grid_size=64)
    env = CombiningObservationsWrapper(env)
    env = VoxelWrapper(env)
    env = StepPenaltyRewardWrapper(env)
    env = FrameStackWrapper(env, num_stack=4, lz4_compress=False)
    env = ActionMaskWrapper(env)
    return env


### DQN

In [9]:
from tqdm import tqdm_notebook as tqdm
from rl.agent import *

In [11]:
import torch
from rl.dqn import *

device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')

model = torch.load("./models/abc-vdqn-occl-sgd/last-71500.pt")
agent = DQNAgent(env.observation_space.shape, env.action_space.n,
                 device=device)

agent.model = model.to(device)

In [None]:
%%time
models_path = "./data/1kabc/simple/val/"
result = {}
for model_path in tqdm(sorted(os.listdir(models_path))):
    env = create_env(os.path.join(models_path, model_path))
    result[model_path] = compute_metrics(env, agent, max_iter=100, iter_cnt=1)
    print(model_path, result[model_path])

HBox(children=(IntProgress(value=0, max=72), HTML(value='')))

  q_value = F.softmax(q_value)


Action:  1
0.45776628333249636 0.05735257562154483 0.5151188589540412
0.05735257562154483
Action:  25
0.3191884698438206 0.1996704841973838 0.5188589540412044
0.1996704841973838
Action:  2
0.37862200572490334 0.33218623516257684 0.7108082408874802
0.33218623516257684
Action:  42
0.2111685833375182 0.5435699269635912 0.7547385103011094
0.5435699269635912
Action:  79
0.3803796514839552 0.5832336607188975 0.9636133122028526
0.5832336607188975
00010085_df3813ab6587454f8b8ccbc1_002.obj (-3.2839871173360065, 0.21178178768205502, 5.0)
Action:  99
0.3100847457627119 0.05996894284337284 0.37005368860608473
0.05996894284337284
Action:  46
0.13203389830508475 0.25909753935647817 0.39113143766156294
0.25909753935647817
Action:  71 (random)
0.3309322033898305 0.21609503860659024 0.5470272419964207
0.21609503860659024
Action:  44
0.0688135593220339 0.5142049334200619 0.5830184927420958
0.5142049334200619
Action:  27
0.24322033898305084 0.37837441146435424 0.6215947504474051
0.37837441146435424
Actio

In [14]:
a, b, c = [], [], []
for name, res in result.items():
    a.append(res[0])
    b.append(res[1])
    c.append(res[2])
np.mean(a), np.mean(b), np.mean(c)

(-18.958703855822097, 0.62630573859276, 61.15277777777778)

### Random

In [None]:
%%time
random_agent_func = lambda s : env.action_space.sample()
models_path = "./data/10abc/"
result = {}
for model_path in os.listdir(models_path):
    env = create_env(os.path.join(models_path, model_path))
    result[model_path] = compute_metrics(env, random_agent_func, max_iter=30)
    print(model_path, result[model_path])

00731313_58fc1a1bb4b5be10cb401503_003.obj (-2.5757095519377606, 0.25795545337154036, 4.3)
00942098_bd039b3a4a4efa75e86b7350_000.obj (-3.0503391472868215, 0.10782423963311767, 4.9)


In [37]:
df = pd.read_csv("./abc_100_train.csv")

model_paths = []
for ix, row in df.iterrows():
    if row["Greedy_novp"] < 10:
        model_paths.append(row["Name"])

In [40]:
for path in model_paths:
    try:
        os.rename(path, os.path.join("./data/1kabc/simple/train", os.path.basename(path)))
    except:
        pass