# Dust3r implementation

In [2]:
import sys
import os
sys.path.append("../dust3r/")

from dust3r.inference import inference
from dust3r.model import AsymmetricCroCo3DStereo
from dust3r.utils.image import load_images
from dust3r.image_pairs import make_pairs
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode



  from .autonotebook import tqdm as notebook_tqdm
  @torch.cuda.amp.autocast(enabled=False)


In [3]:
device = 'cpu'  # or 'cuda' if you have a GPU
batch_size = 1
schedule = 'cosine'
lr = 0.01 # learning rate for global alignment optimization
niter = 300 # number of iterations for global alignment optimization

model_name = "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"

# you can put the path to a local checkpoint in model_name if needed
model = AsymmetricCroCo3DStereo.from_pretrained(model_name).to(device)

# load_images can take a list of images or a directory
images = load_images("../images/", size=512)
pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
output = inference(pairs, model, device, batch_size=batch_size)

>> Loading images from ../images/
 - adding stop_01_20251128_141444.jpg with resolution 3000x4000 --> 384x512
 - adding stop_02_20251128_141452.jpg with resolution 3000x4000 --> 384x512
 - adding stop_03_20251128_141501.jpg with resolution 3000x4000 --> 384x512
 - adding stop_04_20251128_141510.jpg with resolution 3000x4000 --> 384x512
 - adding stop_05_20251128_141518.jpg with resolution 3000x4000 --> 384x512
 - adding stop_06_20251128_141527.jpg with resolution 3000x4000 --> 384x512
 - adding stop_07_20251128_141536.jpg with resolution 3000x4000 --> 384x512
 - adding stop_08_20251128_141544.jpg with resolution 3000x4000 --> 384x512
 - adding stop_09_20251128_141553.jpg with resolution 3000x4000 --> 384x512
 - adding stop_10_20251128_141602.jpg with resolution 3000x4000 --> 384x512
 - adding stop_11_20251128_141611.jpg with resolution 3000x4000 --> 384x512
 - adding stop_12_20251128_141620.jpg with resolution 3000x4000 --> 384x512
 - adding stop_13_20251128_141628.jpg with resolution 

  with torch.cuda.amp.autocast(enabled=bool(use_amp)):
  with torch.cuda.amp.autocast(enabled=False):
  with torch.cuda.amp.autocast(enabled=False):
100%|██████████| 380/380 [52:58<00:00,  8.37s/it]


In [4]:
# next we'll use the global_aligner to align the predictions
# depending on your task, you may be fine with the raw output and not need it
# with only two input images, you could use GlobalAlignerMode.PairViewer: it would just convert the output
# if using GlobalAlignerMode.PairViewer, no need to run compute_global_alignment
scene = global_aligner(output, device=device, mode=GlobalAlignerMode.PointCloudOptimizer)
loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)

 init edge (2*,4*) score=np.float64(21.498865127563477)
 init edge (2,1*) score=np.float64(19.41025733947754)
 init edge (16*,1) score=np.float64(19.210765838623047)
 init edge (3*,1) score=np.float64(17.95309066772461)
 init edge (1,19*) score=np.float64(16.73318862915039)
 init edge (4,11*) score=np.float64(16.52924156188965)
 init edge (5*,3) score=np.float64(16.426549911499023)
 init edge (8*,11) score=np.float64(16.35724449157715)
 init edge (3,0*) score=np.float64(15.730518341064453)
 init edge (1,18*) score=np.float64(15.020994186401367)
 init edge (1,17*) score=np.float64(14.386512756347656)
 init edge (3,6*) score=np.float64(13.243693351745605)
 init edge (11,13*) score=np.float64(12.499796867370605)
 init edge (14*,11) score=np.float64(12.340362548828125)
 init edge (15*,11) score=np.float64(20.018617630004883)
 init edge (5,10*) score=np.float64(17.963863372802734)
 init edge (8,12*) score=np.float64(16.6411190032959)
 init edge (12,9*) score=np.float64(16.278690338134766)
 

Consider using tensor.detach() first. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\pytorch\torch\csrc\autograd\generated\python_variable_methods.cpp:837.)
  return float(loss), lr
100%|██████████| 300/300 [31:52<00:00,  6.38s/it, lr=1.27413e-06 loss=0.005374]  


In [13]:
# retrieve useful values from scene:
imgs = scene.imgs
focals = scene.get_focals()
poses = scene.get_im_poses()
pts3d = scene.get_pts3d()
confidence_masks = scene.get_masks()

# visualize reconstruction
scene.show()

<dust3r.viz.SceneViz at 0x1e6393f2480>

In [19]:
# Save pts3d (tensor/ndarray/list) to a PLY file with optional masks/colors
import numpy as np
import open3d as o3d

def _to_numpy(x):
    try:
        import torch
        if isinstance(x, torch.Tensor):
            return x.detach().cpu().numpy()
    except Exception:
        pass
    return np.asarray(x)

def save_pts3d_ply(pts3d, masks=None, colors=None, filename="output_pointcloud.ply"):
    """
    Save 3D points to a PLY file.
    - pts3d can be:
        * a single array/tensor of shape (H,W,3) or (N,3)
        * a list/tuple of arrays/tensors [(H,W,3), ...] or [(M,3), ...]
    - masks (optional) can match pts3d structure: single (H,W) bool array or list of masks
    - colors (optional) can be same shape as pts3d (H,W,3) or list; values 0-255 or 0-1 accepted
    """
    is_sequence = isinstance(pts3d, (list, tuple))
    pts_list = pts3d if is_sequence else [pts3d]
    masks_list = None if masks is None else (masks if isinstance(masks, (list, tuple)) else [masks])
    colors_list = None if colors is None else (colors if isinstance(colors, (list, tuple)) else [colors])

    collected_pts = []
    collected_cols = []

    for i, p in enumerate(pts_list):
        p_np = _to_numpy(p)
        if p_np.size == 0:
            continue

        if p_np.ndim == 3 and p_np.shape[2] == 3:
            if masks_list is not None and i < len(masks_list):
                m = _to_numpy(masks_list[i]).astype(bool)
            else:
                m = np.isfinite(p_np[..., 0])
            pts_sel = p_np[m]
        elif p_np.ndim == 2 and p_np.shape[1] == 3:
            pts_sel = p_np.reshape(-1, 3)
            if masks_list is not None and i < len(masks_list):
                m = _to_numpy(masks_list[i]).reshape(-1).astype(bool)
                pts_sel = pts_sel[m]
        else:
            raise ValueError(f"Unsupported pts3d shape {p_np.shape}")

        collected_pts.append(pts_sel.reshape(-1, 3))

        if colors_list is not None and i < len(colors_list):
            c_np = _to_numpy(colors_list[i])
            if c_np.ndim == 3 and c_np.shape[2] == 3:
                c_sel = c_np[m].reshape(-1, 3)
            elif c_np.ndim == 2 and c_np.shape[1] == 3:
                c_sel = c_np.reshape(-1, 3)
                if masks_list is not None and i < len(masks_list):
                    c_sel = c_sel[m]
            else:
                raise ValueError(f"Unsupported colors shape {c_np.shape}")
            collected_cols.append(c_sel.reshape(-1, 3))

    if not collected_pts:
        raise RuntimeError("No points to save.")

    pts_all = np.concatenate(collected_pts, axis=0)

    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(pts_all.astype(np.float64))

    if collected_cols:
        cols_all = np.concatenate(collected_cols, axis=0).astype(np.float64)
        if cols_all.max() > 1.5:
            cols_all = cols_all / 255.0
        cols_all = np.clip(cols_all, 0.0, 1.0)
        pcd.colors = o3d.utility.Vector3dVector(cols_all)

    ok = o3d.io.write_point_cloud(filename, pcd)
    if not ok:
        raise RuntimeError(f"Failed to write point cloud to {filename}")
    print(f"Saved {len(pts_all)} points to '{filename}'")

# Example usage:
save_pts3d_ply(pts3d, masks=confidence_masks, colors=imgs, filename="../reconstructions/dust3r.ply")

Saved 1927307 points to '../reconstructions/dust3r.ply'


In [None]:
import sys
sys.path.append('../')

from utils.pointCloud import clean_point_cloud, visualize_point_cloud

# Example: read the PLY we saved earlier, clean it, and visualize
ply_path = '../reconstructions/dust3r.ply'
try:
    pcd = o3d.io.read_point_cloud(ply_path)
except Exception:
    pcd = None

if pcd is None or len(pcd.points) == 0:
    print(f'Could not read {ply_path} or file empty — you can call save_pts3d_ply first or pass another path.')
else:
    print(f'Loaded {len(pcd.points)} points from {ply_path}')
    pcd_clean = clean_point_cloud(pcd, method='statistical', nb_neighbors=50, std_ratio=0.5, voxel_size=None)
    print(f'Cleaned -> {len(pcd_clean.points)} points')
    visualize_point_cloud(pcd_clean, window_name='Dust3R Reconstruction (cleaned)')

Loaded 1927307 points from ../reconstructions/dust3r.ply
Cleaned -> 1476778 points
Cleaned -> 1476778 points
