In [1]:
import csv
import glob
import os
import h5py
import cv2
import numpy as np
import torch
import trimesh
from PIL import Image
from pathlib import Path
import sys
from argparse import ArgumentParser
from datetime import datetime
import torch
import torch.multiprocessing as mp
import yaml
import numpy as np
from munch import munchify
import wandb


import sys
sys.path.append("D:/gs-localization/gaussian_splatting")
sys.path.append("D:/gs-localization")
from tools.gaussian_model import GaussianModel
from gaussian_splatting.utils.system_utils import mkdir_p
from tools.config_utils import load_config, set_config, update_recursive
from tools.dataset import v2_360_Dataset
from tools.multiprocessing_utils import FakeQueue
from tools import read_write_model
from tools.eval_utils import rotation_error, translation_error


with open("configs/mono/tum/fr3_office.yaml", "r") as f:
    cfg_special = yaml.full_load(f)

inherit_from = "configs/mono/tum/base_config.yaml"

if inherit_from is not None:
    cfg = load_config(inherit_from)
else:
    cfg = dict()

# merge per dataset cfg. and main cfg.
config = update_recursive(cfg, cfg_special)
config = cfg
    
data_folder = "D:/gs-localization/datasets/360_v2"
scene = "garden"
tr_dirs = Path(data_folder) / scene / "train_views/triangulated"
config = set_config(tr_dirs, config)

Model = GaussianModel(3, config)
#Model.load_ply("C:/Users/27118/Desktop/master_project/RaDe-GS/output/26b22380-1/point_cloud/iteration_30000/point_cloud.ply")
#Model.load_ply("D:/gaussian-splatting/output/73bdba8c-0/point_cloud/iteration_25000/point_cloud.ply")
Model.load_ply(f"D:/gs-localization/output/360_v2/{scene}/gs_map/iteration_20000/point_cloud.ply")

model_params = munchify(config["model_params"])
pipeline_params = munchify(config["pipeline_params"])
data_folder = "D:/gs-localization/datasets/360_v2"
dataset = v2_360_Dataset(model_params, model_params.source_path, config, data_folder, scene)
bg_color = [0, 0, 0] 
background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")

from gaussian_splatting.utils.graphics_utils import getProjectionMatrix2, getWorld2View2
from tools import render
from tools.slam_utils import image_gradient, image_gradient_mask
from tools.camera_utils import Camera
from tools.slam_utils import get_loss_tracking, get_median_depth
from tools.pose_utils import update_pose

projection_matrix = getProjectionMatrix2(
    znear=0.01,
    zfar=100.0,
    fx=dataset.fx,
    fy=dataset.fy,
    cx=dataset.cx,
    cy=dataset.cy,
    W=dataset.width,
    H=dataset.height,
).transpose(0, 1)
projection_matrix = projection_matrix.to(device="cuda:0")

config["Training"]["opacity_threshold"] = 0.99
config["Training"]["edge_threshold"] = 1.1
from time import time

def gradient_decent(viewpoint, config, initial_R, initial_T):

    viewpoint.update_RT(initial_R, initial_T)
    
    opt_params = []
    opt_params.append(
        {
            "params": [viewpoint.cam_rot_delta],
            "lr": 0.0001,
            "name": "rot_{}".format(viewpoint.uid),
        }
    )
    opt_params.append(
        {
            "params": [viewpoint.cam_trans_delta],
            "lr": 0.001,
            "name": "trans_{}".format(viewpoint.uid),
        }
    )
    opt_params.append(
        {
            "params": [viewpoint.exposure_a],
            "lr": 0.001,
            "name": "exposure_a_{}".format(viewpoint.uid),
        }
    )
    opt_params.append(
        {
            "params": [viewpoint.exposure_b],
            "lr": 0.001,
            "name": "exposure_b_{}".format(viewpoint.uid),
        }
    )
    

    pose_optimizer = torch.optim.Adam(opt_params)
    
    for tracking_itr in range(100):
        
        render_pkg = render(
            viewpoint, Model, pipeline_params, background
        )
        
        image, depth, opacity = (
            render_pkg["render"],
            render_pkg["depth"],
            render_pkg["opacity"],
        )
          
        pose_optimizer.zero_grad()
        
        loss_tracking = get_loss_tracking(
            config, image, depth, opacity, viewpoint
        )
        loss_tracking.backward()
        
    
        with torch.no_grad():
            pose_optimizer.step()
            converged = update_pose(viewpoint, converged_threshold=1e-4)
    
        if converged:
            break
             
    return viewpoint.R, viewpoint.T, render_pkg

import numpy as np
from collections import defaultdict

class Transformation:
    def __init__(self, R=None, T=None):
        self.R = R
        self.T = T

test_infos = defaultdict(Transformation)

def quat_to_rotmat(qvec):
    qvec = np.array(qvec, dtype=float)
    w, x, y, z = qvec
    R = np.array([
        [1 - 2*y**2 - 2*z**2, 2*x*y - 2*z*w, 2*x*z + 2*y*w],
        [2*x*y + 2*z*w, 1 - 2*x**2 - 2*z**2, 2*y*z - 2*x*w],
        [2*x*z - 2*y*w, 2*y*z + 2*x*w, 1 - 2*x**2 - 2*y**2]
    ])
    return R

with open(f"D:/gs-localization/output/360_v2/{scene}/results_sparse.txt", "r") as f:
    for line in f:
        parts = line.strip().split()
        name = parts[0]
        qvec = list(map(float, parts[1:5]))
        tvec = list(map(float, parts[5:8]))

        R = quat_to_rotmat(qvec)
        T = np.array(tvec)

        test_infos[name].R = R
        test_infos[name].T = T


def create_mask(mkpts_lst, width, height, k):
    # 初始化 mask，全为 False
    mask = np.zeros((height, width), dtype=bool)
    
    # 计算 k 的半径
    half_k = k // 2
    
    # 遍历所有点
    for pt in mkpts_lst:
        x, y = int(pt[0]), int(pt[1])
        
        # 计算 k*k 区域的边界
        x_min = max(0, x - half_k)
        x_max = min(width, x + half_k + 1)
        y_min = max(0, y - half_k)
        y_max = min(height, y + half_k + 1)
        
        # 设置 mask 中的 k*k 区域为 True
        mask[y_min:y_max, x_min:x_max] = True
    
    # 形状为 (1, height, width)
    mask = mask[np.newaxis, :, :]
    
    return mask

total = 0
count = 0

e = 0
rot_errors = []
trans_errors = []

file = h5py.File(f'D:/gs-localization/output/360_v2/{scene}/feats-superpoint-n4096-r1024.h5', 'r')

for i, image in enumerate(test_infos):
    viewpoint = Camera.init_from_dataset(dataset, i, projection_matrix)

    viewpoint.compute_grad_mask(config)
    
    group = file[image] 
    keypoints = group['keypoints'][group['scores'][:]>0.2]  
    mask = create_mask(mkpts_lst=keypoints, width=dataset.width, height=dataset.height, k=10)
    viewpoint.grad_mask = viewpoint.grad_mask | torch.tensor(mask).to("cuda:0")

    config["Training"]["monocular"] = True

    initial_R = torch.tensor(test_infos[image].R)
    initial_T = torch.tensor(test_infos[image].T).squeeze()

    rotation_matrix, translation_vector, render_pkg = gradient_decent(viewpoint, config, initial_R, initial_T)
    #rotation_matrix, translation_vector = initial_R, initial_T
    
    rot_error = rotation_error(rotation_matrix.cpu().numpy(), viewpoint.R_gt.cpu().numpy())
    trans_error = translation_error(translation_vector.reshape(3,1).cpu().numpy(), viewpoint.T_gt.reshape(3,1).cpu().numpy())
    
    e += trans_error

    print(image, rot_error, trans_error)
    rot_errors.append(rot_error)
    trans_errors.append(trans_error)
    if rot_error < 5 and trans_error < 0.05:
        count += 1

    total += 1

file.close()

"""
SCENES = ['bicycle', 'bonsai', 'counter', 'garden',  'kitchen', 'room', 'stump']


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--scenes", default=SCENES, choices=SCENES, nargs="+")
    parser.add_argument("--overwrite", action="store_true")
    parser.add_argument(
        "--dataset",
        type=Path,
        default="datasets/360_v2",
        help="Path to the dataset, default: %(default)s",
    )
    parser.add_argument(
        "--outputs",
        type=Path,
        default="output/360_v2",
        help="Path to the output directory, default: %(default)s",
    )

    parser.add_argument(
        "--num_covis",
        type=int,
        default=30,
        help="Number of image pairs for SfM, default: %(default)s",
    )

    parser.add_argument(
        "--num_retrieve",
        type=int,
        default=3,
        help="Number of images for retrieval, default: %(default)s",
    )
    args = parser.parse_args()

    gt_dirs = args.dataset / "{scene}/sparse/0" 
    tr_dirs = args.dataset / "{scene}/train_views/triangulated" 

    for scene in args.scenes:
        logger.info(f'Working on scene "{scene}".')
        if args.overwrite or True:
            run_scene(
                args.dataset / scene / "images_4",
                Path(str(gt_dirs).format(scene=scene)),
                Path(str(tr_dirs).format(scene=scene)), 
                args.dataset / scene,
                args.outputs / scene,
                args.num_covis,
                args.num_retrieve)

"""

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
DSC07957.JPG 0.0 0.0008846196915854965
DSC07961.JPG 0.019026952505869246 0.0005690231295027022
DSC07965.JPG 0.0 0.0021364254783642505
DSC07969.JPG 0.0 0.0007669433324663702
DSC07973.JPG 0.0086168832633178 0.0030191548687424873
DSC07977.JPG 0.0 0.0011863078868375827
DSC07981.JPG 0.026348006084677758 0.0016592148405787709
DSC07985.JPG 0.01996937708652734 0.0013287119768531329
DSC07989.JPG 0.0 0.0007050134270544847
DSC07993.JPG 0.006475060989470229 0.0018918306607795987
DSC07997.JPG 0.0 0.0005930749944654968
DSC08001.JPG 0.0 0.0012311268078979893
DSC08005.JPG 0.021957261566761626 0.0006743152277777875
DSC08009.JPG 0.017490494504312483 0.0006242455519888847
DSC08013.JPG 0.0 0.002412824363649014
DSC08017.JPG 0.0 0.0015544645504503693
DSC08021.JPG 0.022100867825541794 0.0017316720748430004
DSC08025.JPG 0.04740091702305817 0.0

KeyboardInterrupt: 

In [1]:
import os
import h5py
import cv2
import numpy as np
import torch
from tqdm import tqdm
from PIL import Image
from pathlib import Path
import sys
import yaml
from munch import munchify
from math import atan
from collections import OrderedDict

sys.path.append("D:/gs-localization/gaussian_splatting")
sys.path.append("D:/gs-localization")
sys.path.append("D:/gs-localization/gs_localization/pipelines")


from tools.config_utils import load_config, update_recursive
from tools import read_write_model
from tools.gaussian_model import GaussianModel
from tools import render
from tools.camera_utils import Camera
from tools.descent_utils import get_loss_tracking
from tools.pose_utils import update_pose
from tools.graphics_utils import getProjectionMatrix2


def gradient_decent(viewpoint, config, initial_R, initial_T):

    viewpoint.update_RT(initial_R, initial_T)
    
    opt_params = []
    opt_params.append(
        {
            "params": [viewpoint.cam_rot_delta],
            "lr": 0.001,
            "name": "rot_{}".format(viewpoint.uid),
        }
    )
    opt_params.append(
        {
            "params": [viewpoint.cam_trans_delta],
            "lr": 0.001,
            "name": "trans_{}".format(viewpoint.uid),
        }
    )
    opt_params.append(
        {
            "params": [viewpoint.exposure_a],
            "lr": 0.001,
            "name": "exposure_a_{}".format(viewpoint.uid),
        }
    )
    opt_params.append(
        {
            "params": [viewpoint.exposure_b],
            "lr": 0.001,
            "name": "exposure_b_{}".format(viewpoint.uid),
        }
    )
    

    pose_optimizer = torch.optim.Adam(opt_params)
    
    for tracking_itr in range(50):
        
        render_pkg = render(
            viewpoint, Model, pipeline_params, background
        )
        
        image, depth, opacity = (
            render_pkg["render"],
            render_pkg["depth"],
            render_pkg["opacity"],
        )
          
        pose_optimizer.zero_grad()
        
        loss_tracking = get_loss_tracking(
            config, image, depth, opacity, viewpoint
        )
        loss_tracking.backward()
        
    
        with torch.no_grad():
            pose_optimizer.step()
            converged = update_pose(viewpoint, converged_threshold=1e-4)
    
        if converged:
            break
             
    return viewpoint.R, viewpoint.T, render_pkg


class Transformation:
    def __init__(self, R=None, T=None):
        self.R = R
        self.T = T

def quat_to_rotmat(qvec):
    qvec = np.array(qvec, dtype=float)
    w, x, y, z = qvec
    R = np.array([
        [1 - 2*y**2 - 2*z**2, 2*x*y - 2*z*w, 2*x*z + 2*y*w],
        [2*x*y + 2*z*w, 1 - 2*x**2 - 2*z**2, 2*y*z - 2*x*w],
        [2*x*z - 2*y*w, 2*y*z + 2*x*w, 1 - 2*x**2 - 2*y**2]
    ])
    return R


def focal2fov(focal, pixels):
    return 2 * atan(pixels / (2 * focal))

def load_pose(pose_txt):
    pose = []
    with open(pose_txt, 'r') as f:
        for line in f:
            row = line.strip('\n').split()
            row = [float(c) for c in row]
            pose.append(row)
    pose = np.array(pose).astype(np.float32)
    assert pose.shape == (4,4)
    return pose

def create_mask(mkpts_lst, width, height, k):
    # Initial mask as all False
    mask = np.zeros((height, width), dtype=bool)
    
    # Calculat k radius
    half_k = k // 2
    
    # Iterate through all points
    for pt in mkpts_lst:
        x, y = int(pt[0]), int(pt[1])
        
        # Calculate k*k borders
        x_min = max(0, x - half_k)
        x_max = min(width, x + half_k + 1)
        y_min = max(0, y - half_k)
        y_max = min(height, y + half_k + 1)
        
        # Set mask k*k area as True
        mask[y_min:y_max, x_min:x_max] = True
    
    # Shape: (1, height, width)
    mask = mask[np.newaxis, :, :]
    
    return mask

class BaseDataset(torch.utils.data.Dataset):
    def __init__(self, args, path, config):
        self.args = args
        self.path = path
        self.config = config
        self.device = "cuda:0"
        self.dtype = torch.float32
        self.num_imgs = 9999

    def __len__(self):
        return self.num_imgs

    def __getitem__(self, idx):
        pass

class MonocularDataset(BaseDataset):
    def __init__(self, args, path, config):
        super().__init__(args, path, config)
        calibration = config["Dataset"]["Calibration"]
        # Camera prameters
        self.fx = calibration["fx"]
        self.fy = calibration["fy"]
        self.cx = calibration["cx"]
        self.cy = calibration["cy"]
        self.width = calibration["width"]
        self.height = calibration["height"]
        self.fovx = focal2fov(self.fx, self.width)
        self.fovy = focal2fov(self.fy, self.height)
        self.K = np.array(
            [[self.fx, 0.0, self.cx], [0.0, self.fy, self.cy], [0.0, 0.0, 1.0]]
        )
        # distortion parameters
        self.disorted = calibration["distorted"]
        self.dist_coeffs = np.array(
            [
                calibration["k1"],
                calibration["k2"],
                calibration["p1"],
                calibration["p2"],
                calibration["k3"],
            ]
        )
        self.map1x, self.map1y = cv2.initUndistortRectifyMap(
            self.K,
            self.dist_coeffs,
            np.eye(3),
            self.K,
            (self.width, self.height),
            cv2.CV_32FC1,
        )
        # depth parameters
        self.has_depth = True if "depth_scale" in calibration.keys() else False
        self.depth_scale = calibration["depth_scale"] if self.has_depth else None

        # Default scene scale
        nerf_normalization_radius = 5
        self.scene_info = {
            "nerf_normalization": {
                "radius": nerf_normalization_radius,
                "translation": np.zeros(3),
            },
        }

    def __getitem__(self, idx):
        color_path = self.color_paths[idx]
        pose = self.poses[idx]

        image = np.array(Image.open(color_path))
        depth = None

        if self.disorted:
            image = cv2.remap(image, self.map1x, self.map1y, cv2.INTER_LINEAR)

        if self.has_depth:
            depth_path = self.depth_paths[idx]
            depth = np.array(Image.open(depth_path)) / self.depth_scale

        image = (
            torch.from_numpy(image / 255.0)
            .clamp(0.0, 1.0)
            .permute(2, 0, 1)
            .to(device=self.device, dtype=self.dtype)
        )
        pose = torch.from_numpy(pose).to(device=self.device)
        return image, depth, pose


class seven_scenes_Dataset(MonocularDataset):
    def __init__(self, args, path, config, data_folder, scene):
        super().__init__(args, path, config)
        self.has_depth = True
        self.seven_scenes_Parser(data_folder, scene) 
        
    def seven_scenes_Parser(self, data_folder, scene):
        self.color_paths, self.poses, self.depth_paths = [], [], []

        gt_dirs = Path(data_folder) / scene / "sparse/0"
        _, images, _ = read_write_model.read_model(gt_dirs, ".txt")

        # Read the filenames from test_fewshot.txt and store them in a set.
        test_images_path = Path(data_folder) / scene / "test_full.txt"
        
        with open(test_images_path, 'r') as f:
            test_images = set(line.strip() for line in f)
            
        for i, image in tqdm(images.items(),"Load dataset"):
            # Execute the following operation only if image.name exists in test_images."
            if image.name in test_images:
                image_path = Path(data_folder) / scene / 'images_full' / image.name
                depth_path = Path(data_folder) / scene / 'depths_full' / image.name.replace("color","depth")
                self.color_paths.append(image_path)
                self.depth_paths.append(depth_path)
                R_gt, t_gt = image.qvec2rotmat(), image.tvec
                pose = np.eye(4)            
                pose[:3, :3] = R_gt         
                pose[:3, 3] = t_gt 
                self.poses.append(pose)

        # Sort self.color_paths, self.poses, and self.depth_paths based on normal file name order
        sorted_data = sorted(zip(self.color_paths, self.depth_paths, self.poses), key=lambda x: x[0].name)
        self.color_paths, self.depth_paths, self.poses = zip(*sorted_data)
        del images

with open("D:/gs-localization/gs_localization/pipelines/configs/mono/tum/fr3_office.yaml", "r") as f:
    cfg_special = yaml.full_load(f)

inherit_from = "D:/gs-localization/gs_localization/pipelines/configs/mono/tum/base_config.yaml"

if inherit_from is not None:
    cfg = load_config(inherit_from)
else:
    cfg = dict()

# merge per dataset cfg. and main cfg.
config = update_recursive(cfg, cfg_special)
config = cfg
    
data_folder = "D:/gs-localization/datasets/7scenes"
config["Dataset"]["Calibration"]["fx"] = 525
config["Dataset"]["Calibration"]["fy"] = 525
config["Dataset"]["Calibration"]["cx"] = 320
config["Dataset"]["Calibration"]["cy"] = 240
config["Dataset"]["Calibration"]["width"] = 640
config["Dataset"]["Calibration"]["height"] = 480   
config["Dataset"]["Calibration"]['depth_scale'] = 1000.0
config["Training"]["monocular"] = False
config["Training"]["alpha"] = 0.99


Using cache found in C:\Users\27118/.cache\torch\hub\intel-isl_MiDaS_master
  model = create_fn(
Using cache found in C:\Users\27118/.cache\torch\hub\intel-isl_MiDaS_master


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [5]:
from PIL import Image, ImageDraw
import torchvision.transforms as transforms
import numpy as np

# 假设 viewpoint.original_image 和 render_pkg["render"] 是 Tensor
ground_truth_tensor = viewpoint.original_image
localized_tensor = render_pkg["render"]

# 找到每个像素中 R、G、B 三个通道的最大值
max_vals, _ = localized_tensor.max(dim=0)  # 得到每个像素的最大值 (H, W)

# 找到哪些像素的最大值超过 1
exceeds_one_mask = max_vals > 1  # 布尔掩码，标记哪些像素的最大值超过 1

# 对超过 1 的地方，将 R、G、B 值同时按最大值进行归一化
localized_tensor[:, exceeds_one_mask] = localized_tensor[:, exceeds_one_mask] / (max_vals[exceeds_one_mask] + 0.00001)

# 将 Tensor 转换为 PIL 图像
tensor_to_pil = transforms.ToPILImage()

ground_truth_image = tensor_to_pil(ground_truth_tensor)
localized_image = tensor_to_pil(localized_tensor)

# 确保两张图片大小相同（可以选择调整大小）
width, height = ground_truth_image.size
localized_image = localized_image.resize((width, height))

# 创建一个新的空白图像，用来合成 ground truth 和 localized image
combined_image = Image.new('RGB', (width, height))

# 将图像转换为 NumPy 数组，方便逐像素操作
ground_truth_array = np.array(ground_truth_image)
localized_image_array = np.array(localized_image)

# 根据条件 x < ay 来合成图像
for y in range(height):
    for x in range(width):
        if x < (y * (width / height)):  # 根据比例 x < ay 来判断
            combined_image.putpixel((x, y), tuple(ground_truth_array[y, x]))  # 放置 ground truth
        else:
            combined_image.putpixel((x, y), tuple(ground_truth_array[y, x]))  # 放置 localized image

# 画绿色对角线，从左上到右下
draw = ImageDraw.Draw(combined_image)
draw.line((0, 0, width, height), fill="green", width=3)

# 画小框
small_box_start = (215, 220)  # 小框左上角起始点 (x, y)
small_box_width = 150         # 小框的宽度
small_box_height = 100        # 小框的高度
small_box_end = (small_box_start[0] + small_box_width, small_box_start[1] + small_box_height)

# 绘制蓝色小框
draw.rectangle([small_box_start, small_box_end], outline="blue", width=3)

# 提取小框中的部分
small_box_region = combined_image.crop((small_box_start[0], small_box_start[1], small_box_end[0], small_box_end[1]))

# 放大小框中的部分
scale_factor = 1.6  # 放大倍数
large_box_region = small_box_region.resize((int(small_box_width * scale_factor), int(small_box_height * scale_factor)))

# 将放大的大框放置在小框旁边，覆盖图片部分区域
large_box_start_x = small_box_end[0] + 10  # 小框右边再加10像素
large_box_start_y = small_box_start[1]

# 确保大框不会超出图片边界
if large_box_start_x + large_box_region.width > width:
    large_box_start_x = width - large_box_region.width - 10
if large_box_start_y + large_box_region.height > height:
    large_box_start_y = height - large_box_region.height - 10

# 将放大的区域粘贴回原图中
combined_image.paste(large_box_region, (large_box_start_x, large_box_start_y))

# 画大框
large_box_end = (large_box_start_x + large_box_region.width, large_box_start_y + large_box_region.height)
draw.rectangle([large_box_start_x, large_box_start_y, large_box_end[0], large_box_end[1]], outline="blue", width=3)

# 显示结果
combined_image.show()

# 保存图片到指定路径
combined_image.save(f"C:/Users/27118/Desktop/{scene}_compare.png")


NameError: name 'viewpoint' is not defined

In [16]:
med_t = np.median(trans_errors)
med_R = np.median(rot_errors)
print( f"\nMedian errors: {med_t:.3f}m, {med_R:.3f}deg")

threshs_t = [0.01, 0.02, 0.03, 0.05, 0.25, 0.5, 5.0]
threshs_R = [1.0, 2.0, 3.0, 5.0, 2.0, 5.0, 10.0]
for th_t, th_R in zip(threshs_t, threshs_R):
    ratio = np.mean((np.array(trans_errors) < th_t) & (np.array(rot_errors) < th_R))
    print(f"\n\t{th_t*100:.0f}cm, {th_R:.0f}deg : {ratio*100:.2f}%")


Median errors: 0.001m, 0.003deg

	1cm, 1deg : 100.00%

	2cm, 2deg : 100.00%

	3cm, 3deg : 100.00%

	5cm, 5deg : 100.00%

	25cm, 2deg : 100.00%

	50cm, 5deg : 100.00%

	500cm, 10deg : 100.00%


In [43]:
import h5py

# 打开 HDF5 文件
with h5py.File('D:/gs-localization/output/360_v2/stump/feats-superpoint-n4096-r1024.h5', 'r') as file:
    # 指定要读取的图像 key
    target_image_key = '_DSC9266.JPG'
    
    # 获取该图像对应的组
    group = file[target_image_key]
    
    # 提取 keypoints 数据集并存储为 mkpts_lst
    mkpts_lst = group['keypoints'][:]
    
    # 打印结果确认
    print(f"Shape of mkpts_lst: {mkpts_lst.shape}")
    print(f"Keypoints (first few): \n{mkpts_lst[:10]}")


Shape of mkpts_lst: (2205, 2)
Keypoints (first few): 
[[  74.25    9.83]
 [ 108.3     9.83]
 [ 452.5     9.83]
 [ 541.      9.83]
 [ 587.5     9.83]
 [ 631.      9.83]
 [ 648.      9.83]
 [ 694.5     9.83]
 [1042.      9.83]
 [1192.      9.83]]


In [32]:
mkpts_lst[:]

array([[ 74.25,   9.83],
       [108.3 ,   9.83],
       [452.5 ,   9.83],
       ...,
       [844.  , 805.5 ],
       [893.5 , 805.5 ],
       [898.5 , 805.5 ]], dtype=float16)

In [29]:
import h5py

# 打开 HDF5 文件
with h5py.File('D:/gs-localization/output/360_v2/bicycle/feats-superpoint-n4096-r1024.h5', 'r') as file:
    # 查看文件中的所有顶层组（类似于文件夹）
    keys = list(file.keys())
    print("Keys: ", keys)
    
    # 假设你想读取'_DSC8679.JPG'组中的数据
    first_image_key = keys[0]  # '_DSC8679.JPG'
    group = file[first_image_key]  # 获取组对象
    
    # 读取'descriptors'数据集
    descriptors = group['descriptors'][:]
    print(f"Shape of descriptors: {descriptors.shape}")
    print(f"Descriptors (first few): \n{descriptors[:10]}")
    
    # 读取'image_size'数据集
    image_size = group['image_size'][:]
    print(f"Image size: {image_size}")
    
    # 读取'keypoints'数据集
    keypoints = group['keypoints'][:]
    print(f"Shape of keypoints: {keypoints.shape}")
    print(f"Keypoints (first few): \n{keypoints[:10]}")
    print(keypoints[:][])
    
    # 读取'scores'数据集
    scores = group['scores'][:]
    print(f"Shape of scores: {scores.shape}")
    print(f"Scores (first few): \n{scores[:10]}")


Keys:  ['_DSC8679.JPG', '_DSC8680.JPG', '_DSC8681.JPG', '_DSC8682.JPG', '_DSC8683.JPG', '_DSC8684.JPG', '_DSC8685.JPG', '_DSC8686.JPG', '_DSC8687.JPG', '_DSC8688.JPG', '_DSC8689.JPG', '_DSC8690.JPG', '_DSC8691.JPG', '_DSC8692.JPG', '_DSC8693.JPG', '_DSC8694.JPG', '_DSC8695.JPG', '_DSC8696.JPG', '_DSC8697.JPG', '_DSC8698.JPG', '_DSC8699.JPG', '_DSC8700.JPG', '_DSC8701.JPG', '_DSC8702.JPG', '_DSC8703.JPG', '_DSC8704.JPG', '_DSC8705.JPG', '_DSC8706.JPG', '_DSC8707.JPG', '_DSC8708.JPG', '_DSC8709.JPG', '_DSC8710.JPG', '_DSC8711.JPG', '_DSC8712.JPG', '_DSC8713.JPG', '_DSC8714.JPG', '_DSC8715.JPG', '_DSC8716.JPG', '_DSC8717.JPG', '_DSC8718.JPG', '_DSC8719.JPG', '_DSC8720.JPG', '_DSC8721.JPG', '_DSC8722.JPG', '_DSC8723.JPG', '_DSC8724.JPG', '_DSC8725.JPG', '_DSC8726.JPG', '_DSC8727.JPG', '_DSC8728.JPG', '_DSC8729.JPG', '_DSC8730.JPG', '_DSC8731.JPG', '_DSC8732.JPG', '_DSC8733.JPG', '_DSC8734.JPG', '_DSC8735.JPG', '_DSC8736.JPG', '_DSC8737.JPG', '_DSC8738.JPG', '_DSC8739.JPG', '_DSC8741.JPG', 

In [None]:
#read_write_model.read_cameras_binary("D:/gs-localization/datasets/nerf_llff_data/fern/train_views/triangulated/cameras.bin")