In [1]:
import torch
import pytorch_lightning as pl
import numpy as np
import os
import os.path as osp
from transglot.models.listener import *
from transglot.simple_utils import unpickle_data
import matplotlib.pyplot as plt
from PIL import Image
import hydra
import omegaconf

top_data_dir = 'data/main_data_for_chairs'
img_dir = osp.join(top_data_dir,"images/shapenet/03001627")
data_name = 'game_data.pkl'
game_data, word_to_int, int_to_word, int_to_sn_model, sn_model_to_int, sorted_sn_models = \
    unpickle_data(osp.join(top_data_dir, data_name))
vocab_size=len(int_to_word)


In [2]:
#git: before reduce attention dimension by 2
outputs_top_dir = "outputs"
ver_date_dir = "pretrained"

# Automatically select best ckpt #
best_acc = -1
for file in os.listdir(osp.join(outputs_top_dir, f"checkpoints/{ver_date_dir}")):
    if file[-4:] != "ckpt" or file == "last.ckpt":
        continue
    val_acc = float(file[-9:-5])
    if val_acc > best_acc:
        best_acc = val_acc
        ckpt_path = osp.join(outputs_top_dir, f"checkpoints/{ver_date_dir}/{file}")
#################################

# get hyper parameters #
hparams_path = osp.join(outputs_top_dir,"logs", ver_date_dir, "hparams.yaml")
hparams = omegaconf.OmegaConf.load(hparams_path)
########################

listener = Transglot.load_from_checkpoint(ckpt_path, hparams=hparams).cuda()
listener.eval()
for p in listener.parameters():
    p.requires_grad = False

unique geometries in train/test/val 3145 393 393


In [3]:
trainer = pl.Trainer(gpus=1,
                    checkpoint_callback=False,
                    logger=False)
trainer.test(listener)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


unique geometries in train/test/val 3145 393 393


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 78.01102447509766, 'test_loss': 0.660237729549408}
--------------------------------------------------------------------------------


[{'test_acc': 78.01102447509766, 'test_loss': 0.660237729549408}]

In [4]:
# Make a set of chairs in the test dataset
listener.eval()
test_ds = listener.test_ds
int2sn = int_to_sn_model
sn2int = sn_model_to_int
all_test_chairs = set()
for i in range(len(test_ds)):
    chairs, chairs_idx, targets, padded_token = test_ds[i]
    sn_id1, sn_id2, sn_id3 = int2sn[chairs_idx[0]], int2sn[chairs_idx[1]], int2sn[chairs_idx[2]]
    if sn_id1 not in all_test_chairs:
        all_test_chairs.add(sn_id1)
    if sn_id2 not in all_test_chairs:
        all_test_chairs.add(sn_id2)
    if sn_id3 not in all_test_chairs:
        all_test_chairs.add(sn_id3)

In [5]:
# Extract chairs that exists in shapenet_seg dataset
all_test_chairs = list(all_test_chairs)
test_chairs = []

# You can download Shapenet with part annotations from code below.
# wget https://shapenet.cs.stanford.edu/ericyi/shapenetcore_partanno_v0.zip
dir_path = "/home/ubuntu/datasets/full_shapenet_seg/03001627" 
for i in all_test_chairs:
    path = osp.join(dir_path, f"{i}.txt")
    if osp.exists(path):
        test_chairs.append(i)

In [6]:
def calculate_iou(sn_id, sentence="leg", threshold=0.5):
     # 12: back |13: seat | 14: leg | 15: arm
    
    ###### Load & break_up pc and label #############
    pc = np.loadtxt(osp.join(dir_path,f"{sn_id}.txt"))
    label = pc[:, -1]
    back_label = (label == 12.)
    seat_label = (label == 13.)
    leg_label = (label == 14.)
    arm_label = (label == 15.)
    
    pc = pc[:, :3]
    #################################################
    
    ########### make network's input         ############
    pc_input = torch.tensor(pc).float().unsqueeze(0).cuda()
    
    token = sentence.split(" ")
    token = np.array([word_to_int[token] for token in token])
    pad_cnt = 34 - len(token)
    padded_token = np.pad(token, (0, pad_cnt), "constant", constant_values=0)
    padded_token = torch.tensor(padded_token).unsqueeze(0).cuda()
    ###################################################
    
    ########### extract point feature & language feature ############
    p_f = listener.pc_encoder(pc_input)
    l_f = listener.language_encoder_attn(padded_token)[0]
    #################################################################
    
    ######## get attention & binary #######
    attn_f, attn_weight = listener.cross_attn_layers[0](l_f.unsqueeze(1), p_f, p_f)
    attn_map = attn_weight.squeeze().detach().cpu().numpy()
    minv = attn_map.min(axis=0)
    maxv = attn_map.max(axis=0)
    attn_map = (attn_map - minv) / (maxv - minv)
    attn_map = np.where(attn_map > threshold, 1, 0)
    ######################################
    
    def get_iou(pred, gt):
        intersect = np.sum(np.logical_and(pred, gt))
        union = np.sum(np.logical_or(pred, gt))
        iou = intersect / (union + 1e-8)
        return iou
    
    back_iou = 100*get_iou(attn_map, back_label) if back_label.sum() > 0 else float("NaN")
    seat_iou = 100*get_iou(attn_map, seat_label) if seat_label.sum() > 0 else float("NaN")
    leg_iou = 100*get_iou(attn_map, leg_label) if leg_label.sum() > 0 else float("NaN")
    arm_iou = 100*get_iou(attn_map, arm_label) if arm_label.sum() > 0 else float("NaN")
    
    return back_iou, seat_iou, leg_iou, arm_iou

In [7]:
from math import isnan

# iou["back"]["leg"] = iou of leg, given back label.
iou = dict(back=dict(back=[],seat=[],leg=[],arm=[]),
           seat=dict(back=[],seat=[],leg=[],arm=[]),
           leg=dict(back=[],seat=[],leg=[],arm=[]),
           arm=dict(back=[],seat=[],leg=[],arm=[])) 

for (i, sn_id) in enumerate(test_chairs):
    if i % 100 == 0:
        print(f"{i} finish")
    for label in ["back", "seat", "leg", "arm"]:
        back, seat, leg, arm = calculate_iou(sn_id, label)
        if not isnan(back):
            iou[label]["back"].append(back)
        if not isnan(seat):
            iou[label]["seat"].append(seat)
        if not isnan(leg):
            iou[label]["leg"].append(leg)
        if not isnan(arm):
            iou[label]["arm"].append(arm)

for i in ["back", "seat", "leg", "arm"]:
    for j in ["back", "seat", "leg", "arm"]:
        iou[i][j] = sum(iou[i][j]) / len(iou[i][j])

0 finish
100 finish
200 finish
300 finish
400 finish
500 finish
600 finish
700 finish


In [8]:
for i in ["back", "seat", "leg", "arm"]:
    print(f"Given {i}")
    for j in ["back", "seat", "leg", "arm"]:
        print(f"{j}: {iou[i][j]:.2f}", end=" | ")
        if j == "arm":
            print("")

Given back
back: 20.35 | seat: 0.44 | leg: 0.14 | arm: 1.45 | 
Given seat
back: 40.21 | seat: 20.87 | leg: 3.99 | arm: 7.74 | 
Given leg
back: 1.35 | seat: 1.49 | leg: 14.93 | arm: 0.84 | 
Given arm
back: 0.34 | seat: 7.54 | leg: 3.60 | arm: 4.85 | 
