# Depth Anything 3 (DA3) Usage Example

This notebook demonstrates how to use Depth Anything 3 for camera poses and depth estimation.


In [None]:
# Install required packages
# !pip install depth-anything-3

In [None]:
!uv pip install addict 

In [1]:
import os
import gc
import glob

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch


from depth_anything_3.api import DepthAnything3
from depth_anything_3.utils.visualize import visualize_depth

[97m[INFO ] ModelCache initialized[0m
[93m[WARN ] Dependency `gsplat` is required for rendering 3DGS. Install via: pip install git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70[0m


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DepthAnything3.from_pretrained("depth-anything/DA3NESTED-GIANT-LARGE-1.1")
model = model.to(device)
model.eval()
print(f"Model loaded on {device}")

[97m[INFO ] Model cache MISS: da3nested-giant-large on cuda. Loading...[0m
[97m[INFO ] using SwiGLU layer as FFN[0m
[97m[INFO ] using MLP layer as FFN[0m
[97m[INFO ] Model cached: da3nested-giant-large on cuda[0m
[97m[INFO ] GPUInputProcessor initialized with device=cuda (NVJPEG enabled)[0m
[97m[INFO ] Using GPUInputProcessor (NVJPEG support enabled on cuda)[0m
Model loaded on cuda


In [3]:
# Load sample images and run inference
image_paths = [
    "../data/4356/images/T_S04856.jpg",
    "../data/4356/images/T_S04857.jpg",
    "../data/4356/images/T_S04858.jpg",
    "../data/4356/images/T_S04859.jpg",
]

image_paths = glob.glob("../data/4356/images/*.jpg")[0:2]

# Run inference
prediction = model.inference(
    image=image_paths,
    process_res=504,
    process_res_method="upper_bound_resize",
)
print(f"Depth shape: {prediction.depth.shape}")
print(f"Extrinsics: {prediction.extrinsics.shape if prediction.extrinsics is not None else 'None'}")
print(f"Intrinsics: {prediction.intrinsics.shape if prediction.intrinsics is not None else 'None'}")

torch.cuda.empty_cache()
gc.collect()

[97m[INFO ] Processed Images Done taking 0.20669007301330566 seconds. Shape:  torch.Size([2, 3, 378, 504])[0m
[97m[INFO ] Model Forward Pass Done. Time: 0.6087818145751953 seconds[0m
[97m[INFO ] Conversion to Prediction Done. Time: 0.0010051727294921875 seconds[0m
Depth shape: (2, 378, 504)
Extrinsics: (2, 3, 4)
Intrinsics: (2, 3, 3)


78

In [6]:
prediction

Prediction(depth=array([[[6.9227676, 6.8979325, 6.9349837, ..., 6.8425274, 6.902506 ,
         6.8778615],
        [7.1019325, 6.8394485, 6.897662 , ..., 6.8362913, 6.783809 ,
         6.8170834],
        [6.925881 , 6.9420166, 6.947764 , ..., 6.8219805, 6.7791815,
         6.829266 ],
        ...,
        [1.321439 , 1.3202624, 1.3233435, ..., 1.4607216, 1.4608374,
         1.461333 ],
        [1.3264894, 1.3231192, 1.3253591, ..., 1.4556857, 1.4564066,
         1.4570692],
        [1.340921 , 1.326265 , 1.3282058, ..., 1.4537432, 1.4585526,
         1.4561818]],

       [[9.20652  , 9.20652  , 9.20652  , ..., 8.290206 , 8.219383 ,
         9.20652  ],
        [9.20652  , 9.20652  , 9.20652  , ..., 8.336343 , 8.182493 ,
         8.241579 ],
        [9.20652  , 9.85613  , 9.20652  , ..., 8.443877 , 9.20652  ,
         8.367616 ],
        ...,
        [1.1434184, 1.1414905, 1.1425039, ..., 1.0719016, 1.0729605,
         1.0740818],
        [1.1389155, 1.1393204, 1.1388811, ..., 1.066334

In [None]:
# Visualize input images and depth maps
n_images = len(image_paths)

fig, axes = plt.subplots(2, n_images, figsize=(12, 6))

if n_images == 1:
    axes = axes.reshape(2, 1)

for i in range(n_images):
    # Show original image
    if prediction.processed_images is not None:
        axes[0, i].imshow(prediction.processed_images[i])
    axes[0, i].set_title(f"Input {i+1}")
    axes[0, i].axis('off')
    
    # Show depth map
    depth_vis = visualize_depth(prediction.depth[i], cmap="Spectral")
    axes[1, i].imshow(depth_vis)
    axes[1, i].set_title(f"Depth {i+1}")
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()

In [None]:
predictions

In [None]:
# Visualize input images and depth maps
n_images = len(image_paths)

fig, axes = plt.subplots(2, n_images, figsize=(12, 6))

if n_images == 1:
    axes = axes.reshape(2, 1)

for i in range(n_images):
    # Show original image
    if prediction.processed_images is not None:
        axes[0, i].imshow(prediction.processed_images[i])
    axes[0, i].set_title(f"Input {i+1}")
    axes[0, i].axis('off')
    
    # Show depth map
    depth_vis = visualize_depth(prediction.depth[i], cmap="Spectral")
    axes[1, i].imshow(depth_vis)
    axes[1, i].set_title(f"Depth {i+1}")
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()

In [None]:
prediction.depth[0]

In [None]:
# Visualize input images and depth maps
n_images = len(image_paths)

fig, axes = plt.subplots(2, n_images, figsize=(12, 6))

if n_images == 1:
    axes = axes.reshape(2, 1)

for i in range(n_images):
    # Show original image
    if prediction.processed_images is not None:
        axes[0, i].imshow(prediction.processed_images[i])
    axes[0, i].set_title(f"Input {i+1}")
    axes[0, i].axis('off')
    
    # Show depth map
    depth_vis = visualize_depth(prediction.depth[i], cmap="Spectral")
    axes[1, i].imshow(depth_vis)
    axes[1, i].set_title(f"Depth {i+1}")
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()

In [None]:
prediction.depth[0]

In [None]:
# Visualize input images and depth maps
n_images = len(image_paths)

fig, axes = plt.subplots(2, n_images, figsize=(12, 6))

if n_images == 1:
    axes = axes.reshape(2, 1)

for i in range(n_images):
    # Show original image
    if prediction.processed_images is not None:
        axes[0, i].imshow(prediction.processed_images[i])
    axes[0, i].set_title(f"Input {i+1}")
    axes[0, i].axis('off')
    
    # Show depth map
    depth_vis = visualize_depth(prediction.depth[i], cmap="Spectral")
    axes[1, i].imshow(depth_vis)
    axes[1, i].set_title(f"Depth {i+1}")
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()

In [None]:
prediction.depth[0]