# Dust3r implementation

## Loading Libraries

In [1]:
import sys
sys.path.append("../dust3r/")

from dust3r.inference import inference
from dust3r.model import AsymmetricCroCo3DStereo
from dust3r.utils.image import load_images
from dust3r.image_pairs import make_pairs
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode



  @torch.cuda.amp.autocast(enabled=False)


## Setting environment

In [2]:
sys.path.append("../")
from utils.imageSelector import select_equally_distributed_images

device = 'cuda'  # or 'cuda' if you have a GPU
batch_size = 1
schedule = 'cosine'
lr = 0.01 # learning rate for global alignment optimization
niter = 300 # number of iterations for global alignment optimization

model_name = "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"

# you can put the path to a local checkpoint in model_name if needed
model = AsymmetricCroCo3DStereo.from_pretrained(model_name).to(device)

loaded_images = select_equally_distributed_images("../segmented_images/", 3)
# load_images can take a list of images or a directory
images = load_images(loaded_images, size=512)

>> Loading a list of 3 images
 - adding ../segmented_images/stop_01_20251128_141444.jpg with resolution 3000x4000 --> 384x512
 - adding ../segmented_images/stop_10_20251128_141602.jpg with resolution 3000x4000 --> 384x512
 - adding ../segmented_images/stop_20_20251128_141730.jpg with resolution 3000x4000 --> 384x512
 (Found 3 images)


## Infering

In [3]:
pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
output = inference(pairs, model, device, batch_size=batch_size)

>> Inference with model on 6 image pairs


  with torch.cuda.amp.autocast(enabled=bool(use_amp)):
  with torch.cuda.amp.autocast(enabled=False):
  with torch.cuda.amp.autocast(enabled=False):
100%|██████████| 6/6 [00:02<00:00,  2.31it/s]


## Global Alignment

In [4]:
# next we'll use the global_aligner to align the predictions
# depending on your task, you may be fine with the raw output and not need it
# with only two input images, you could use GlobalAlignerMode.PairViewer: it would just convert the output
# if using GlobalAlignerMode.PairViewer, no need to run compute_global_alignment
scene = global_aligner(output, device=device, mode=GlobalAlignerMode.PointCloudOptimizer)
loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)

 init edge (0*,2*) score=4.184772968292236
 init edge (1*,0) score=2.235483407974243
 init loss = 0.004356085788458586
Global alignement - optimizing for:
['pw_poses', 'im_depthmaps', 'im_poses', 'im_focals']


Consider using tensor.detach() first. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\pytorch\torch\csrc\autograd\generated\python_variable_methods.cpp:837.)
  return float(loss), lr
100%|██████████| 300/300 [00:08<00:00, 36.74it/s, lr=1.27413e-06 loss=0.00225511]


## Showing the reconstructed scene

In [5]:
# retrieve useful values from scene:
imgs = scene.imgs
focals = scene.get_focals()
poses = scene.get_im_poses()
pts3d = scene.get_pts3d()
confidence_masks = scene.get_masks()

# visualize reconstruction
scene.show()

<dust3r.viz.SceneViz at 0x1e99abc2490>

## Saving to PLY file

In [6]:
# Save pts3d (tensor/ndarray/list) to a PLY file with optional masks/colors
import numpy as np
import open3d as o3d

def _to_numpy(x):
    try:
        import torch
        if isinstance(x, torch.Tensor):
            return x.detach().cpu().numpy()
    except Exception:
        pass
    return np.asarray(x)

def save_pts3d_ply(pts3d, masks=None, colors=None, filename="output_pointcloud.ply"):
    """
    Save 3D points to a PLY file.
    - pts3d can be:
        * a single array/tensor of shape (H,W,3) or (N,3)
        * a list/tuple of arrays/tensors [(H,W,3), ...] or [(M,3), ...]
    - masks (optional) can match pts3d structure: single (H,W) bool array or list of masks
    - colors (optional) can be same shape as pts3d (H,W,3) or list; values 0-255 or 0-1 accepted
    """
    is_sequence = isinstance(pts3d, (list, tuple))
    pts_list = pts3d if is_sequence else [pts3d]
    masks_list = None if masks is None else (masks if isinstance(masks, (list, tuple)) else [masks])
    colors_list = None if colors is None else (colors if isinstance(colors, (list, tuple)) else [colors])

    collected_pts = []
    collected_cols = []

    for i, p in enumerate(pts_list):
        p_np = _to_numpy(p)
        if p_np.size == 0:
            continue

        if p_np.ndim == 3 and p_np.shape[2] == 3:
            if masks_list is not None and i < len(masks_list):
                m = _to_numpy(masks_list[i]).astype(bool)
            else:
                m = np.isfinite(p_np[..., 0])
            pts_sel = p_np[m]
        elif p_np.ndim == 2 and p_np.shape[1] == 3:
            pts_sel = p_np.reshape(-1, 3)
            if masks_list is not None and i < len(masks_list):
                m = _to_numpy(masks_list[i]).reshape(-1).astype(bool)
                pts_sel = pts_sel[m]
        else:
            raise ValueError(f"Unsupported pts3d shape {p_np.shape}")

        collected_pts.append(pts_sel.reshape(-1, 3))

        if colors_list is not None and i < len(colors_list):
            c_np = _to_numpy(colors_list[i])
            if c_np.ndim == 3 and c_np.shape[2] == 3:
                c_sel = c_np[m].reshape(-1, 3)
            elif c_np.ndim == 2 and c_np.shape[1] == 3:
                c_sel = c_np.reshape(-1, 3)
                if masks_list is not None and i < len(masks_list):
                    c_sel = c_sel[m]
            else:
                raise ValueError(f"Unsupported colors shape {c_np.shape}")
            collected_cols.append(c_sel.reshape(-1, 3))

    if not collected_pts:
        raise RuntimeError("No points to save.")

    pts_all = np.concatenate(collected_pts, axis=0)

    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(pts_all.astype(np.float64))

    if collected_cols:
        cols_all = np.concatenate(collected_cols, axis=0).astype(np.float64)
        if cols_all.max() > 1.5:
            cols_all = cols_all / 255.0
        cols_all = np.clip(cols_all, 0.0, 1.0)
        pcd.colors = o3d.utility.Vector3dVector(cols_all)

    ok = o3d.io.write_point_cloud(filename, pcd)
    if not ok:
        raise RuntimeError(f"Failed to write point cloud to {filename}")
    print(f"Saved {len(pts_all)} points to '{filename}'")

# Example usage:
save_pts3d_ply(pts3d, masks=confidence_masks, colors=imgs, filename="../reconstructions/dust3r_3_True.ply")

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Saved 103279 points to '../reconstructions/dust3r_3_True.ply'


## Visualizing the point cloud

In [7]:
import sys
sys.path.append('../')
import open3d as o3d

from utils.pointCloud import visualize_point_cloud

# Example: read the PLY we saved earlier, clean it, and visualize
ply_path = '../reconstructions/dust3r_3_True.ply'
try:
    pcd = o3d.io.read_point_cloud(ply_path)
except Exception:
    pcd = None

if pcd is None or len(pcd.points) == 0:
    print(f'Could not read {ply_path} or file empty — you can call save_pts3d_ply first or pass another path.')
else:
    print(f'Loaded {len(pcd.points)} points from {ply_path}')
    visualize_point_cloud(pcd, window_name='Dust3R Reconstruction (cleaned)')

Loaded 103279 points from ../reconstructions/dust3r_3_True.ply


## Outputs

| Number of Images | Using Segmentation? | Output number of points | Reconstruction Time in GPU | Point Cloud Reconstruction Quality (1-10) |
| - | - | - | - | - |
| 20 | No | 1,927,307 | 41.18 min |  3 |
| 10 | No | 948,252 | 81 s | 3 |
| 8 | No | 764,791 | 164s | 4 |
| 4 | No | 362,816 | 15s | 3 |
| 3 | No | 282,624 | 10s | 2 |
| 20 | Yes | 852,987 | 47 min | 5 | 
| 10 | Yes | 420,200 | 81s | 5 |  
| 8 | Yes | 337,370 | 52s | 5 | 
| 4 | Yes | 152,935 | 14s | 6 | 
| 3 | Yes | 103,279 | 10s |4 |

Overall, the reconstructions are better with segmented images. All of them would need further cleaning to be presentable