In [1]:
import os
import cv2
import numpy as np
from collections import defaultdict, deque


def compute_feature_matches(image_paths, min_matches=20):
    orb = cv2.ORB_create(nfeatures=1000)
    index_params = dict(algorithm=6, table_number=6, key_size=12, multi_probe_level=1)
    search_params = {}
    matcher = cv2.FlannBasedMatcher(index_params, search_params)

    keypoints_descriptors = {}
    matches_graph = defaultdict(list)

    print("🔍 Extraction des descripteurs ORB...")

    for path in image_paths:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        keypoints, descriptors = orb.detectAndCompute(img, None)
        if descriptors is not None:
            keypoints_descriptors[path] = descriptors

    print("🔗 Recherche des images connectées...")

    for i, path1 in enumerate(image_paths):
        desc1 = keypoints_descriptors.get(path1)
        if desc1 is None:
            continue

        for j, path2 in enumerate(image_paths):
            if i == j:
                continue
            desc2 = keypoints_descriptors.get(path2)
            if desc2 is None:
                continue

            matches = matcher.knnMatch(desc1, desc2, k=2)
            good = []
            for m_n in matches:
                if len(m_n) < 2:
                    continue
                m, n = m_n
                if m.distance < 0.75 * n.distance:
                    good.append(m)

            if len(good) >= min_matches:
                matches_graph[path1].append(path2)

    return matches_graph


def extract_image_sequence(matches_graph):
    visited = set()
    sequence = []

    def dfs(node):
        visited.add(node)
        sequence.append(node)
        for neighbor in matches_graph[node]:
            if neighbor not in visited:
                return dfs(neighbor)

    # Find a node with outgoing edges
    for start_node in matches_graph:
        if start_node not in visited:
            dfs(start_node)

    return sequence


# === MAIN ===

image_dir = "/home/jourdelune/Images/colmap/input"
image_paths = sorted(
    [
        os.path.join(image_dir, fname)
        for fname in os.listdir(image_dir)
        if fname.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff"))
    ]
)

graph = compute_feature_matches(image_paths, min_matches=30)
sequence = extract_image_sequence(graph)

print("\n✅ Séquence d’images connectées trouvée :")
for img in sequence:
    print("  -", img)

# missing image
print("le nombre d'images manquantes est :", len(image_paths) - len(sequence))


🔍 Extraction des descripteurs ORB...
🔗 Recherche des images connectées...

✅ Séquence d’images connectées trouvée :
  - /home/jourdelune/Images/colmap/input/image1.jpg
  - /home/jourdelune/Images/colmap/input/image10.jpg
  - /home/jourdelune/Images/colmap/input/image12.jpg
  - /home/jourdelune/Images/colmap/input/image13.jpg
  - /home/jourdelune/Images/colmap/input/image14.jpg
  - /home/jourdelune/Images/colmap/input/image16.jpg
  - /home/jourdelune/Images/colmap/input/image11.jpg
  - /home/jourdelune/Images/colmap/input/image17.jpg
  - /home/jourdelune/Images/colmap/input/image15.jpg
  - /home/jourdelune/Images/colmap/input/image20.jpg
  - /home/jourdelune/Images/colmap/input/image21.jpg
  - /home/jourdelune/Images/colmap/input/image3.jpg
  - /home/jourdelune/Images/colmap/input/image2.jpg
  - /home/jourdelune/Images/colmap/input/image18.jpg
  - /home/jourdelune/Images/colmap/input/image24.jpg
  - /home/jourdelune/Images/colmap/input/image27.jpg
  - /home/jourdelune/Images/colmap/inpu

In [2]:
import os
import numpy as np
import torch
import trimesh
from torch.nn import functional as F
from vggt.models.vggt import VGGT
from vggt.utils.geometry import unproject_depth_map_to_point_map
from vggt.utils.helper import create_pixel_coordinate_grid, randomly_limit_trues
from vggt.utils.load_fn import load_and_preprocess_images_square
from vggt.utils.pose_enc import pose_encoding_to_extri_intri

device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16

model = VGGT.from_pretrained("facebook/VGGT-1B").to(device)

image_dir = "/home/jourdelune/Images/colmap/input"
image_names = sequence

vggt_fixed_resolution = 518
img_load_resolution = 1024
batch_size = 3  # max images per VGGT run

# Load all images
images_all, original_coords_all = load_and_preprocess_images_square(
    image_names, img_load_resolution
)

# Split into batches
total_images = images_all.shape[0]
batched_extrinsic, batched_intrinsic = [], []
batched_points_3d, batched_points_rgb, batched_points_xyf = [], [], []

print(f"Total images: {total_images}, Batch size: {batch_size}")

for i in range(0, total_images, batch_size):
    images_batch = images_all[i : i + batch_size].to(device)
    original_coords = original_coords_all[i : i + batch_size].to(device)

    # Resize and run VGGT
    images_resized = F.interpolate(
        images_batch,
        size=(vggt_fixed_resolution, vggt_fixed_resolution),
        mode="bilinear",
        align_corners=False,
    )

    with torch.no_grad():
        with torch.cuda.amp.autocast(dtype=dtype):
            images_input = images_resized[None]
            aggregated_tokens_list, ps_idx = model.aggregator(images_input)
            pose_enc = model.camera_head(aggregated_tokens_list)[-1]
            extrinsic, intrinsic = pose_encoding_to_extri_intri(
                pose_enc, images_input.shape[-2:]
            )
            depth_map, depth_conf = model.depth_head(
                aggregated_tokens_list, images_input, ps_idx
            )

    extrinsic = extrinsic.squeeze(0).cpu().numpy()
    intrinsic = intrinsic.squeeze(0).cpu().numpy()
    depth_map = depth_map.squeeze(0).cpu().numpy()
    depth_conf = depth_conf.squeeze(0).cpu().numpy()

    points_3d = unproject_depth_map_to_point_map(depth_map, extrinsic, intrinsic)

    image_size = np.array([vggt_fixed_resolution, vggt_fixed_resolution])
    num_frames, height, width, _ = points_3d.shape

    points_rgb = (images_resized.cpu().numpy() * 255).astype(np.uint8)
    points_rgb = points_rgb.transpose(0, 2, 3, 1)
    points_xyf = create_pixel_coordinate_grid(num_frames, height, width)

    conf_thres_value = 5.0
    max_points_for_colmap = 100000
    conf_mask = depth_conf >= conf_thres_value
    conf_mask = randomly_limit_trues(conf_mask, max_points_for_colmap)

    batched_extrinsic.append(extrinsic)
    batched_intrinsic.append(intrinsic)
    batched_points_3d.append(points_3d[conf_mask])
    batched_points_rgb.append(points_rgb[conf_mask])
    batched_points_xyf.append(points_xyf[conf_mask])

  from .autonotebook import tqdm as notebook_tqdm


Total images: 26, Batch size: 3


  with torch.cuda.amp.autocast(dtype=dtype):


In [3]:
import copy
from typing import Union

import numpy as np
import open3d as o3d


def draw_registration_result(source, target, transformation = np.identity(4)):
    source_temp = copy.deepcopy(source)
    target_temp = copy.deepcopy(target)

    source_temp.transform(transformation)
    o3d.visualization.draw_geometries([source_temp, target_temp],
                                      zoom=1,
                                      front=[0.9288, -0.2951, -0.2242],
                                      lookat=[1.6784, 2.0612, 1.4451],
                                      up=[-0.3402, -0.9189, -0.1996])
    

def align_point_clouds(source: Union[o3d.geometry.PointCloud, np.ndarray],
                  target: Union[o3d.geometry.PointCloud, np.ndarray],
                  threshold: float = 0.4,
                  max_iteration: int = 400) -> o3d.pipelines.registration.RegistrationResult:
    reg_p2p = o3d.pipelines.registration.registration_icp(
        source, target, threshold, np.identity(4),
        o3d.pipelines.registration.TransformationEstimationPointToPoint(),
        o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=max_iteration))
    return reg_p2p

def merge_and_draw(source: o3d.geometry.PointCloud, target: o3d.geometry.PointCloud, transformation: np.ndarray = np.identity(4)) -> o3d.geometry.PointCloud:
    source_temp = copy.deepcopy(source)
    target_temp = copy.deepcopy(target)
    source_temp.transform(transformation)
    merged = source_temp + target_temp
    return merged


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [4]:
# generate source from the first batch
source = o3d.geometry.PointCloud()
source.points = o3d.utility.Vector3dVector(batched_points_3d[0])
source.colors = o3d.utility.Vector3dVector(batched_points_rgb[0] / 255.0)

In [5]:
# generate target from the second batch
target = o3d.geometry.PointCloud()
target.points = o3d.utility.Vector3dVector(batched_points_3d[1])
target.colors = o3d.utility.Vector3dVector(batched_points_rgb[1] / 255.0)

In [11]:
import copy

source_vis = copy.deepcopy(source)
target_vis = copy.deepcopy(target)

# Génère une transformation de translation (décalage)
translation = np.identity(4)
translation[:3, 3] = [2, 0.2, -0.3]  # exemple de décalage (x, y, z)

target_vis.transform(translation)

o3d.visualization.draw_geometries([source_vis, target_vis])

In [12]:
# Apply point-to-point ICP
reg_p2p = align_point_clouds(source, target, max_iteration=4000)

In [13]:
o3d.visualization.draw_geometries([merge_and_draw(source, target, reg_p2p.transformation)])