[Reference](https://medium.com/voxel51/vggt-is-a-pure-neural-approach-to-3d-vision-32841d5e3c32)

In [1]:
pip install fiftyone

Collecting fiftyone
  Downloading fiftyone-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting argcomplete (from fiftyone)
  Downloading argcomplete-3.6.2-py3-none-any.whl.metadata (16 kB)
Collecting async_lru>=2 (from fiftyone)
  Downloading async_lru-2.0.5-py3-none-any.whl.metadata (4.5 kB)
Collecting boto3 (from fiftyone)
  Downloading boto3-1.40.1-py3-none-any.whl.metadata (6.7 kB)
Collecting dacite<1.8.0,>=1.6.0 (from fiftyone)
  Downloading dacite-1.7.0-py3-none-any.whl.metadata (14 kB)
Collecting Deprecated (from fiftyone)
  Downloading Deprecated-1.2.18-py2.py3-none-any.whl.metadata (5.7 kB)
Collecting ftfy (from fiftyone)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting hypercorn>=0.13.2 (from fiftyone)
  Downloading hypercorn-0.17.3-py3-none-any.whl.metadata (5.4 kB)
Collecting kaleido!=0.2.1.post1 (from fiftyone)
  Downloading kaleido-1.0.0-py3-none-any.whl.metadata (5.6 kB)
Collecting mongoengine~=0.29.1 (from fiftyone)
  Downloading mongoengine-0.29.1-

In [2]:
import fiftyone as fo
from fiftyone.utils.huggingface import load_from_hub
import fiftyone.zoo as foz

dataset = load_from_hub("Voxel51/Total-Text-Dataset")

foz.register_zoo_model_source(
    "https://github.com/harpreetsahota204/vggt",
    overwrite=True
)

In [3]:
model = foz.load_zoo_model(
    "facebook/VGGT-1B",
    install_requirements=True,
    mode="crop", # you can also pass "pad",
    confidence_threshold=0.7
    )

# Apply to your dataset
dataset.apply_model(model, "depth_map_path")

In [4]:
import os
from pathlib import Path
import fiftyone as fo

grouped_dataset = fo.Dataset("vggt_results", overwrite=True)
grouped_dataset.add_group_field("group", default="rgb")

samples = []
for filepath in dataset.values("filepath"):
    path = Path(filepath)
    base_dir = path.parent
    base_name = path.stem

    # Create paths for each modality
    rgb_path = filepath
    depth_path = os.path.join(base_dir, f"{base_name}_depth.png")
    threed_path = os.path.join(base_dir, f"{base_name}.fo3d")

    group = fo.Group()
    samples.extend([
        fo.Sample(filepath=rgb_path, group=group.element("rgb")),
        fo.Sample(filepath=depth_path, group=group.element("depth")),
        fo.Sample(filepath=threed_path, group=group.element("threed"))
    ])

grouped_dataset.add_samples(samples)
fo.launch_app(grouped_dataset)  # View results interactively