In [None]:
# Loading the config file:
import os

files = os.listdir('./templates/ycbv_desc/models_xyz/')
print(files)

In [None]:
from fused_zs6d import Fused_ZS6D
import os
import json

with open(os.path.join("./zs6d_configs/bop_eval_configs/cfg_ycbv_inference_bop.json"), "r") as f:
    config = json.load(f)

# Instantiating the pose estimator:
# This involves handing over the path to the templates_gt file and the corresponding object norm_factors.
pose_estimator = Fused_ZS6D(config['templates_gt_path'], config['norm_factor_path'], model_type='dino_vits8', stride=4,
                      subset_templates=8, max_crop_size=80)

In [None]:
import cv2
from PIL import Image
import pose_utils.img_utils as img_utils
import pose_utils.vis_utils as vis_utils
import numpy as np
import time
import matplotlib.pyplot as plt
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch.nn.functional as F
from extractor_sd import load_model, process_features_and_mask, get_mask
from utils.utils_correspondence import co_pca, resize, find_nearest_patchs, find_nearest_patchs_replace
import matplotlib.pyplot as plt
import sys
from extractor_dino import ViTExtractor
from sklearn.decomposition import PCA as sklearnPCA
import math
from sklearn.cluster import KMeans
from scipy.optimize import linear_sum_assignment


"""SETUP for SD-DINO"""
SEED = 42
VER = "v1-5"
SIZE = 960 # Image size
TIMESTEP = 100

# Set NumPy random seed for reproducibility
np.random.seed(SEED)

# Set PyTorch CPU random seed for reproducibility
torch.manual_seed(SEED)

# Set PyTorch GPU random seed for reproducibility
torch.cuda.manual_seed(SEED)

# Enable CuDNN benchmark mode for performance
torch.backends.cudnn.benchmark = True

# Load model
model, aug = load_model(diffusion_ver=VER, image_size=SIZE, num_timesteps=TIMESTEP)

"""SETUP for ZS6D"""
img_id = '000048_1'
# img_id = '8'

# Loading a ground truth file to access segmentation masks to test zs6d:
with open(os.path.join(config['gt_path']), 'r') as f:
    data_gt = json.load(f)

for i in range(len(data_gt[img_id])):
    obj_number = i
    obj_id = data_gt[img_id][obj_number]['obj_id']
    cam_K = np.array(data_gt[img_id][obj_number]['cam_K']).reshape((3, 3))
    bbox = data_gt[img_id][obj_number]['bbox_visib']

    img_path = os.path.join(config['dataset_path'], data_gt[img_id][obj_number]['img_name'].split("./")[-1])
    # own test
    img_path = os.path.join('test/', '000001.png')
    img = Image.open(img_path)

    mask = data_gt[img_id][obj_number]['mask_sam']
    mask = img_utils.rle_to_mask(mask)
    mask = mask.astype(np.uint8)

    start_time = time.time()

    # To estimate the objects Rotation R and translation t the input image, the object_id, a segmentation mask and camera matrix are necessary
    R_est, t_est = pose_estimator.get_pose(model, aug, img, str(obj_id), mask, cam_K, bbox=None)

    end_time = time.time()

    out_img = vis_utils.draw_3D_bbox_on_image(np.array(img), R_est, t_est, cam_K,
                                              data_gt[img_id][obj_number]['model_info'], factor=1.0)

    plt.imshow(out_img)
    plt.show()
    print(f"Pose estimation time: {end_time - start_time}")
    print(f"R_est: {R_est}")
    print(f"t_est: {t_est}")


