# Demo Manipulation

## Export Data

Feature extraction and matching work directly from images, so export these first.

In [None]:
%load_ext autoreload
%autoreload 2
from tqdm.notebook import tqdm  # notebook-friendly progress bars
from pathlib import Path

from hloc import extract_features, match_features, reconstruction, visualization, pairs_from_exhaustive
from hloc.visualization import plot_images, read_image, plot_keypoints
from hloc.utils import viz_3d

import os

In [None]:
from flow_control.demo.playback_env_servo import PlaybackEnvServo
from flow_control.localize.hloc_utils import export_images_by_parts

# root_dir = Path("/home/argusm/CLUSTER/robot_recordings/flow/recombination/2023-01-24")
root_dir = Path("/home/argusm/Desktop/Demonstrations/2023-01-24")
parts_fn = root_dir / 'parts.json'
hloc_root = root_dir.parent / ( str(root_dir.name) + '_hloc')

mapping_dir = hloc_root / 'mapping'
outputs = hloc_root / 'outputs'
sfm_pairs = outputs / 'pairs-sfm.txt'
loc_pairs = outputs / 'pairs-loc.txt'
sfm_dir = outputs / 'sfm'
features_path = outputs / 'features.h5'
matches_path = outputs / 'matches.h5'
!rm -rf $outputs
!rm -rf $mapping_dir

parts_references = export_images_by_parts(root_dir, parts_fn, mapping_dir)

In [None]:
references_all = [ref for ref_part in parts_references.values() for ref in ref_part]
references_files = [p.relative_to(hloc_root).as_posix() for p in (hloc_root / 'mapping/').iterdir()]
assert len(set(references_all)-set(references_files)) == 0
references = parts_references['locate']

In [None]:
import numpy as np
from scipy.spatial.transform import Rotation as R

def get_info(demo_dir, frame_index):
    arr = np.load(os.path.join(demo_dir, f"frame_{frame_index:06d}.npz"), allow_pickle=True)
    return arr["robot_state"].item(), arr["info"].item()

def pos_orn_to_matrix(pos, orn):
    mat = np.eye(4)
    if len(orn) == 4:
        mat[:3, :3] = R.from_quat(orn).as_matrix()
    elif len(orn) == 3:
        mat[:3, :3] = R.from_euler('xyz', orn).as_matrix()
    mat[:3, 3] = pos
    return mat

def get_tcp_pose(demo_dir, frame_index):
    arr = np.load(os.path.join(demo_dir, f"frame_{frame_index:06d}.npz"),allow_pickle=True)
    state = arr["robot_state"].item()
    return pos_orn_to_matrix(state["tcp_pos"], state["tcp_orn"])

def get_extr_cal(demo_dir):
    camera_info = np.load(Path(demo_dir) / "camera_info.npz", allow_pickle=True)
    extr = camera_info["gripper_extrinsic_calibration"]
    return extr

In [None]:
tcp_poses = {'locate': {}, 'grasp': {}, 'insert': {}}
extr_cal = {}

for part_key in parts_references:
    tmp_ref = parts_references[part_key]
    
    for key in tmp_ref:
        tmp = key[0:-5].strip().split('_')
        frame_idx = int(tmp[1])
        rec_name = tmp[0].split('/')[1]
        rec_dir = root_dir / rec_name
        
        extr_cal[rec_name] = get_extr_cal(rec_dir)
        tcp_poses[part_key][rec_name] = get_tcp_pose(rec_dir, frame_idx)

In [None]:
plot_images([read_image(hloc_root / r) for r in parts_references['insert'][:4]], dpi=50)

In [None]:
print(len(references), "mapping images")
plot_images([read_image(hloc_root / r) for r in references[:4]], dpi=50)

In [None]:
from flow_control.localize.hloc_utils import save_features_seg

features_seg_path = outputs / 'features_seg.h5'

feature_conf = extract_features.confs['superpoint_aachen']
matcher_conf = match_features.confs['superglue']

extract_features.main(feature_conf, hloc_root, image_list=references_all, feature_path=features_path)
save_features_seg(root_dir, features_seg_path, features_path, references_all)

pairs_from_exhaustive.main(sfm_pairs, image_list=references)
match_features.main(matcher_conf, sfm_pairs, features=features_path, matches=matches_path)

In [None]:
from hloc.utils.io import get_keypoints

num_images = 4
plot_images([read_image(hloc_root / r) for r in references[:num_images]], dpi=75)
plot_keypoints([get_keypoints(features_path, r) for r in references[:num_images]], colors='lime', ps=4)

## Load Match Database

hloc saves all features and matches in a SQL database, so reading these is the easiest option.

In [None]:
from hloc.utils.io import get_keypoints
from flow_control.localize.hloc_utils import get_segmentation

name0 = references[1]
kps0, noise0 = get_keypoints(features_path, name0, return_uncertainty=True)
kps0_seg, noise0 = get_keypoints(features_seg_path, name0, return_uncertainty=True)
seg = get_segmentation(root_dir, name0)

plot_images([read_image(hloc_root / r) for r in [name0, ]]+[seg], dpi=75)
plot_keypoints([kps0, kps0_seg], colors='lime', ps=4)

In [None]:
from hloc.utils.io import get_matches
from flow_control.localize.hloc_utils import kp_seg_filter

name_q = references[1]
name_d = references[3]

matches, scores = get_matches(matches_path, name_q, name_d)
kps_q, noise_q = get_keypoints(features_path, name_q, return_uncertainty=True)
kps_d, noise_d = get_keypoints(features_path, name_d, return_uncertainty=True)
kps_q_match = kps_q[matches[:, 0]]
kps_d_match = kps_d[matches[:, 1]]

#%prun in_seg = kp_seg_filter_pb(kps_d_match, name_d)
in_seg = kp_seg_filter(kps_d_match, name_d, features_seg_path)

print("in_seg", in_seg)
print(kps_d_match[in_seg].shape)

kps_q_seg = kps_q_match[in_seg]
kps_d_seg = kps_d_match[in_seg]

In [None]:
from hloc.visualization import plot_matches
from flow_control.localize.hloc_utils import get_playback, align_pointclouds
import matplotlib.pyplot as plt
import os
import json

idx = 10
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)
    
os.makedirs("./mapping", exist_ok=True)
def find_best_demo(name_q, query_cam, references):
    
    results = {}
    for name_d in tqdm(references):
        if name_q == name_d:
            continue
        
        res = align_pointclouds(root_dir, matches_path, features_path, features_seg_path,
                                           name_q, name_d, query_cam=query_cam)
        if res is None:
            continue
            
        res['trf_est'] = res['trf_est']
        res['num_inliers'] = res['num_inliers']
        res['num_candidates'] = res['num_candidates']
        res['in_score'] = res['num_candidates']
        res['kps_q'] = res['kps_q']
        res['kps_d'] = res['kps_d']
        
        results[name_d] = res
        plot_images([read_image(hloc_root / r) for r in [name_q, name_d]], dpi=75)
        plot_matches(res["kps_q"], res["kps_d"], a=0.1)
        print(f"Num. Inliers: {res['num_inliers']}")
#         print(res['kps_q'], res["kps_d"])
        plt.show()

    results = {k: v for k, v in results.items() if v is not None}
    results_sorted = sorted(results.items(), key=lambda t: -t[1]["num_inliers"])
        
    name_d_best = results_sorted[0][0]
    
    results['name_d_best'] = name_d_best
    res_best = results_sorted[0][1]
    return name_d_best, res_best, results

def get_all_transformations(name_q, query_cam, references):
    
    results = {}
    for name_d in tqdm(references):
        if name_q == name_d:
            continue
        
        res = align_pointclouds(root_dir, matches_path, features_path, features_seg_path,
                                           name_q, name_d, query_cam=query_cam)
        if res is None:
            continue
            
        res['trf_est'] = res['trf_est']
        res['num_inliers'] = res['num_inliers']
        res['num_candidates'] = res['num_candidates']
        res['in_score'] = res['num_candidates']
        res['kps_q'] = res['kps_q']
        res['kps_d'] = res['kps_d']
        
        results[name_d] = res
        
    return results

results = {}
for idx in range(30):    
    name_q = references[idx]
    pb, frame_index = get_playback(root_dir, name_q)
    query_cam = pb[frame_index].cam

    name_d_best, res_best, res = find_best_demo(name_q, query_cam, references)
    results[idx] = res
    
# print(name_q, name_d_best)
# trf_best = res_best["trf_est"]

# plot_images([read_image(hloc_root / r) for r in [name_q, name_d_best]], dpi=75)
# plot_matches(res_best["kps_q"], res_best["kps_d"], a=0.1)

In [None]:
def abs_to_world_tcp(align_trf, live_world_tcp, T_tcp_cam):
    """
    The goal tcp position: T_tcp_wold.
    """
    t_camlive_camdemo = np.linalg.inv(align_trf)
    cam_base_est = live_world_tcp @ T_tcp_cam @ t_camlive_camdemo
    tcp_base_est = cam_base_est @ np.linalg.inv(T_tcp_cam)
    return tcp_base_est

In [None]:
# fig = plt.figure(figsize=(12,12))
# ax = fig.add_subplot(projection='3d')

def get_object_positions_angles(idx):
    x_pos = []
    y_pos = []
    angles =[]
    selected = []
    rec_names = []
    res = results[idx]

    for key in res.keys():
        if key == 'name_d_best':
            continue
        tmp = key.strip().split('_')[0]
        rec_name = tmp.split('/')[1]

        t1 = tcp_poses['locate'][rec_name]
        t2 = tcp_poses['insert'][rec_name]

        trf_live_to_demo = abs_to_world_tcp(res[key]['trf_est'], np.eye(4), extr_cal[rec_name])
        final_loc = trf_live_to_demo @ np.linalg.inv(t1) @ t2

        rot_mat = final_loc[0:3, 0:3]
        r = R.from_matrix(rot_mat)
        r = r.as_euler('zyx', degrees=True)

        label = rec_name
        if rec_name in results[idx]['name_d_best']:
            label += "*"
            selected.append(True)
        else:
            selected.append(False)

        x_pos.append(final_loc[0, 3])
        y_pos.append(final_loc[1, 3])
        angles.append(r[0])
        rec_names.append(rec_name)
        
    return x_pos, y_pos, angles, selected, rec_names

object_data = {}

for idx in range(30):
    x_pos, y_pos, angles, selected, rec_names = get_object_positions_angles(idx)
    object_data[idx] = {'xpos': x_pos, 'ypos': y_pos, 'angles': angles, 'selected': selected, 'rec_names': rec_names}
    
#         ax.scatter(final_loc[0, 3], final_loc[1, 3], r[0], s=results[key]['num_inliers']*20, label=label)

#     ax.set_xlabel('x')
#     ax.set_ylabel('y')
#     ax.set_zlabel('angle')
#     ax.legend()
    # ax.show()
#     print(name_d_best)

    # plot_images([read_image(hloc_root / r) for r in [name_q, name_d_best]], dpi=75)
    # plot_matches(res["kps_q"], res["kps_d"], a=0.1)
    # plt.show()

In [None]:
def cluster(x_pos, y_pos, angles, selected):
    print(selected)
    sel = np.where(selected == True)[0][0]
    
    sel_xpos = x_pos[sel]
    sel_ypos = y_pos[sel]
    
    cluster_lengths = []
    clusters = []
    
    for idx in range(len(x_pos)):
        xdiff = np.abs(x_pos[idx] - x_pos)
        ydiff = np.abs(y_pos[idx] - y_pos)
        angle_diff = np.abs(angles[idx] - angles)
        
        cluster = np.where((xdiff < 0.05) & (ydiff < 0.05) & (angle_diff < 20.0))[0]
        
        cluster_lengths.append(len(cluster))
        clusters.append(cluster)
        
#     print(clusters, cluster_lengths)
#     print(cluster_lengths[sel])

    return cluster_lengths, clusters
    
def plot_3d(query_idx):
    fig = plt.figure(figsize=(12, 12))
    ax = fig.add_subplot(projection='3d')
    
    xpos = np.array(object_data[query_idx]['xpos'])
    ypos = np.array(object_data[query_idx]['xpos'])
    angles = np.array(object_data[query_idx]['angles'])
    selected = np.array(object_data[query_idx]['selected'])
    rec_names = object_data[query_idx]['rec_names']
    
    cluster_lengths, clusters = cluster(xpos, ypos, angles, selected)
    print(f"Cluster Sizes: {cluster_lengths}")
    print(f"Clusters: {clusters}")
    max_cluster_size = np.max(cluster_lengths)
    
    rec_names_clusters = []
    
    for tmp_i, key in enumerate(results[query_idx].keys()):
        if key == 'name_d_best':
            continue
        label = rec_names[tmp_i]
        if rec_names[tmp_i] in results[query_idx]['name_d_best']:
            label = label + '*'
        if cluster_lengths[tmp_i] == max_cluster_size:
            rec_names_clusters.append(rec_names[tmp_i])
            label += '+'
        ax.scatter(xpos[tmp_i], ypos[tmp_i], angles[tmp_i], s=results[query_idx][key]['num_inliers'] * 20, label=label)

    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('angle')
    ax.legend()
    
    name_q = references[query_idx]
    name_d_selected = results[query_idx]['name_d_best']
    
    # Inlier selection
    plot_images([read_image(hloc_root / r) for r in [name_q, name_d_selected]], dpi=75)
    plot_matches(results[query_idx][name_d_selected]["kps_q"], results[query_idx][name_d_selected]["kps_d"], a=0.1)
    
    # Clustering
    for name_d in references:
        for i in range(len(rec_names_clusters)):
            if rec_names_clusters[i] in name_d:
                plot_images([read_image(hloc_root / r) for r in [name_q, name_d]], dpi=75)
                plot_matches(results[query_idx][name_d]["kps_q"], results[query_idx][name_d]["kps_d"], a=0.1)
                
                print(f"Recording_name: {rec_names_clusters[i]}")
    
# for plot_idx in range(30):
#     plot_3d(plot_idx)

plot_3d(24)
    


In [None]:
import copy
import open3d as o3d
from flow_control.localize.hloc_utils import get_pointcloud, get_segmented_pointcloud

def draw_registration_result(source_arr, target_arr, transformation, color="rgb"):
    source = o3d.geometry.PointCloud()
    source.points = o3d.utility.Vector3dVector(source_arr[:, :3])
    target = o3d.geometry.PointCloud()
    target.points = o3d.utility.Vector3dVector(target_arr[:, :3])
    
    if color == "rgb":
        source.colors = o3d.utility.Vector3dVector(source_arr[:, 4:7] )
        target.colors = o3d.utility.Vector3dVector(target_arr[:, 4:7] )
        source_temp = copy.deepcopy(source)
        target_temp = copy.deepcopy(target)
    else:
        source_temp = copy.deepcopy(source)
        target_temp = copy.deepcopy(target)
        source_temp.paint_uniform_color([1, 0.706, 0])
        target_temp.paint_uniform_color([0, 0.651, 0.929])
        
    source_temp.transform(transformation)
    o3d.visualization.draw_geometries([source_temp, target_temp])

    
pc_full_q, _ = get_segmented_pointcloud(name_q, root_dir=root_dir)
pc_full_d, bbox = get_segmented_pointcloud(name_d_best, root_dir=root_dir)
# pc_full_q, bbox = get_segmented_pointcloud(name_q, root_dir=root_dir, is_live=True, trf=np.linalg.inv(trf_best), bbox=bbox)
# pc_full_q, bbox = get_segmented_pointcloud(name_q, root_dir=root_dir)

# o3d.visualization.draw_geometries([pc, bbox])

print(pc_full_q.shape)
draw_registration_result(pc_full_q, pc_full_d, trf_best)

In [None]:
pcd_q = o3d.cuda.pybind.geometry.PointCloud()
pcd_q.points = o3d.utility.Vector3dVector(pc_full_q[:, 0:3])

pcd_d = o3d.cuda.pybind.geometry.PointCloud()
pcd_d.points = o3d.utility.Vector3dVector(pc_full_d[:, 0:3]) 

In [None]:
evaluation = o3d.pipelines.registration.evaluate_registration(
    pcd_q, pcd_d, max_correspondence_distance=0.02, transformation=trf_best)
print(evaluation)

# Localization (Live Inferences)

In [None]:


from PIL import Image
def create_query_image(query_cam):
    query_dir = hloc_root / "query"
    Path(query_dir).mkdir(parents=True, exist_ok=True)
    image_path_query = query_dir / "live.jpg"
    image_arr = query_cam.get_image()[0]
    Image.fromarray(image_arr).save(image_path_query)
    return image_path_query.relative_to(hloc_root).as_posix()

name_q = references[0]
pb, frame_index = get_playback(root_dir, name_q)
query_cam = pb[frame_index].cam
query = create_query_image(query_cam)

references_live = [x for x in references if x != name_q]
extract_features.main(feature_conf, hloc_root, image_list=[query], feature_path=features_path, overwrite=True)
pairs_from_exhaustive.main(loc_pairs, image_list=[query], ref_list=references_live)
match_features.main(matcher_conf, loc_pairs, features=features_path, matches=matches_path, overwrite=True)
name_d_best_live, res_best_live = find_best_demo(query, qery_cam, references_live)

print(name_q, name_d_best_live)
plot_images([read_image(hloc_root / r) for r in [name_q, name_d_best_live]], dpi=75)
plot_matches(res_best_live["kps_q"], res_best_live["kps_d"], a=0.1)

In [None]:
%load_ext autoreload
%autoreload 2



In [None]:
from flow_control.localize.hloc_utils import get_playback

name_q = selection_hloc.parts_references['locate'][0]
pb, frame_index = get_playback(root_dir, name_q)
query_cam = pb[frame_index].cam
name_best, res_best = selection_hloc.get_best_demo(query_cam)
print(name_best)

# Original File

In this notebook, we will build a 3D map of a scene from a small set of images and then localize an image downloaded from the Internet. This demo was contributed by [Philipp Lindenberger](https://github.com/Phil26AT/).

In [None]:
%load_ext autoreload
%autoreload 2
import tqdm, tqdm.notebook
tqdm.tqdm = tqdm.notebook.tqdm  # notebook-friendly progress bars
from pathlib import Path

from hloc import extract_features, match_features, reconstruction, visualization, pairs_from_exhaustive
from hloc.visualization import plot_images, read_image, plot_keypoints
from hloc.utils import viz_3d

# Setup
Here we define some output paths.

In [None]:
images = Path('datasets/sacre_coeur')
outputs = Path('outputs/demo/')
!rm -rf $outputs
sfm_pairs = outputs / 'pairs-sfm.txt'
loc_pairs = outputs / 'pairs-loc.txt'
sfm_dir = outputs / 'sfm'
features = outputs / 'features.h5'
matches = outputs / 'matches.h5'

feature_conf = extract_features.confs['superpoint_aachen']
matcher_conf = match_features.confs['superglue']

# 3D mapping
First we list the images used for mapping. These are all day-time shots of Sacre Coeur.

In [None]:
references = [p.relative_to(images).as_posix() for p in (images / 'mapping/').iterdir()]
print(len(references), "mapping images")
plot_images([read_image(images / r) for r in references[:4]], dpi=50)

Then we extract features and match them across image pairs. Since we deal with few images, we simply match all pairs exhaustively. For larger scenes, we would use image retrieval, as demonstrated in the other notebooks.

In [None]:
extract_features.main(feature_conf, images, image_list=references, feature_path=features)
pairs_from_exhaustive.main(sfm_pairs, image_list=references)
match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches);

The we run incremental Structure-From-Motion and display the reconstructed 3D model.

In [None]:
model = reconstruction.main(sfm_dir, images, sfm_pairs, features, matches, image_list=references)
fig = viz_3d.init_figure()
viz_3d.plot_reconstruction(fig, model, color='rgba(255,0,0,0.5)', name="mapping")
fig.show()

We also visualize which keypoints were triangulated into the 3D model.

In [None]:
visualization.visualize_sfm_2d(model, images, color_by='visibility', n=2)

# Localization
Now that we have a 3D map of the scene, we can localize any image. To demonstrate this, we download [a night-time image from Wikimedia](https://commons.wikimedia.org/wiki/File:Paris_-_Basilique_du_Sacr%C3%A9_Coeur,_Montmartre_-_panoramio.jpg).

In [None]:
url = "https://upload.wikimedia.org/wikipedia/commons/5/53/Paris_-_Basilique_du_Sacr%C3%A9_Coeur%2C_Montmartre_-_panoramio.jpg"
# try other queries by uncommenting their url
# url = "https://upload.wikimedia.org/wikipedia/commons/5/59/Basilique_du_Sacr%C3%A9-C%C5%93ur_%285430392880%29.jpg"
# url = "https://upload.wikimedia.org/wikipedia/commons/8/8e/Sacr%C3%A9_C%C5%93ur_at_night%21_%285865355326%29.jpg"
query = 'query/night.jpg'
!mkdir -p $images/query && wget $url -O $images/$query -q
plot_images([read_image(images / query)], dpi=75)

Again, we extract features for the query and match them exhaustively.

In [None]:
extract_features.main(feature_conf, images, image_list=[query], feature_path=features, overwrite=True)
pairs_from_exhaustive.main(loc_pairs, image_list=[query], ref_list=references)
match_features.main(matcher_conf, loc_pairs, features=features, matches=matches, overwrite=True)

We read the EXIF data of the query to infer a rough initial estimate of camera parameters like the focal length. Then we estimate the absolute camera pose using PnP+RANSAC and refine the camera parameters.

In [None]:
import pycolmap
from hloc.localize_sfm import QueryLocalizer, pose_from_cluster

camera = pycolmap.infer_camera_from_image(images / query)
ref_ids = [model.find_image_with_name(r).image_id for r in references]
conf = {
    'estimation': {'ransac': {'max_error': 12}},
    'refinement': {'refine_focal_length': True, 'refine_extra_params': True},
}
localizer = QueryLocalizer(model, conf)
ret, log = pose_from_cluster(localizer, query, camera, ref_ids, features, matches)

print(f'found {ret["num_inliers"]}/{len(ret["inliers"])} inlier correspondences.')
visualization.visualize_loc_from_log(images, query, log, model)

We visualize the correspondences between the query images a few mapping images. We can also visualize the estimated camera pose in the 3D map.

In [None]:
pose = pycolmap.Image(tvec=ret['tvec'], qvec=ret['qvec'])
viz_3d.plot_camera_colmap(fig, pose, camera, color='rgba(0,255,0,0.5)', name=query)
fig.show()