# VGG-T Reconstruction

## Loading Libraries

In [1]:
import os
import torch
import numpy as np
import sys
import glob
import gc
import time

sys.path.append("../vggt/")

from visual_util import predictions_to_glb
from vggt.models.vggt import VGGT
from vggt.utils.load_fn import load_and_preprocess_images
from vggt.utils.pose_enc import pose_encoding_to_extri_intri
from vggt.utils.geometry import unproject_depth_map_to_point_map

sys.path.append("../")
from utils.imageSelector import select_equally_distributed_images

device = "cpu"

## Setting environment

In [2]:
print("Initializing and loading VGGT model...")

model = VGGT()
_URL = "https://huggingface.co/facebook/VGGT-1B/resolve/main/model.pt"
model.load_state_dict(torch.hub.load_state_dict_from_url(_URL))

model.eval()
model = model.to(device)

Initializing and loading VGGT model...


## Getting functions

In [3]:
def run_model(target_dir, model, image_paths=None):
    """
    Run the VGGT model on images in the 'target_dir/images' folder and return predictions.
    If `image_paths` is provided, run on that list instead of scanning the directory.
    """
    print(f"Processing images from {target_dir}")

    start_time = time.time()
    gc.collect()
    torch.cuda.empty_cache()

    # Device check
    device = "cpu"
    # Move model to device
    model = model.to(device)
    model.eval()

    # Load and preprocess images (either provided list or all in folder)
    if image_paths is None:
        image_names = glob.glob(os.path.join(target_dir, "images", "*"))
        image_names = sorted(image_names)
    else:
        image_names = list(image_paths)
    print(f"Found {len(image_names)} images to process")
    if len(image_names) == 0:
        raise ValueError("No images found. Check your upload or selection.")

    images = load_and_preprocess_images(image_names).to(device)
    print(f"Preprocessed images shape: {images.shape}")

    # Run inference
    print("Running inference...")
    with torch.no_grad():
        predictions = model(images)

    # Convert pose encoding to extrinsic and intrinsic matrices
    print("Converting pose encoding to extrinsic and intrinsic matrices...")
    extrinsic, intrinsic = pose_encoding_to_extri_intri(predictions["pose_enc"], images.shape[-2:])
    predictions["extrinsic"] = extrinsic
    predictions["intrinsic"] = intrinsic

    # Convert tensors to numpy
    for key in list(predictions.keys()):
        if isinstance(predictions[key], torch.Tensor):
            predictions[key] = predictions[key].cpu().numpy().squeeze(0)  # remove batch dimension

    # Generate world points from depth map
    print("Computing world points from depth map...")
    depth_map = predictions["depth"]  # (S, H, W, 1)
    world_points = unproject_depth_map_to_point_map(depth_map, predictions["extrinsic"], predictions["intrinsic"])
    predictions["world_points_from_depth"] = world_points

    # Clean up
    torch.cuda.empty_cache()
    return predictions

In [4]:
def perform_reconstruction(target_dir,
    image_dir,
    conf_thres=50.0,
    frame_filter="All",
    mask_black_bg=True,
    mask_white_bg=False,
    show_cam=True,
    mask_sky=False,
    prediction_mode="Pointmap Regression",
    n_select=10,
    
):
    """
    Perform reconstruction using the already-created target_dir/images.

    This version selects `n_select` equally-distributed images from
    `target_dir/images` (via `select_equally_distributed_images`) and runs
    the model on that subset. If the selector returns empty, falls back to
    running on all images.
    """
    if not os.path.isdir(target_dir) or target_dir == "None":
        return None, "No valid target directory found. Please upload first.", None, None

    start_time = time.time()
    gc.collect()
    torch.cuda.empty_cache()

    # Select equally distributed images
    
    selected_images = select_equally_distributed_images(image_dir, n_select)
    if selected_images:
        print(f"Selected {len(selected_images)} equally distributed images for reconstruction.")
    else:
        # fallback: use all images in the folder
        print("Selector returned no images, falling back to all images in the folder.")
        selected_images = sorted(glob.glob(os.path.join(image_dir, "*")))

    # Prepare frame_filter choices from selected images
    frame_filter_choices = ["All"] + [f"{i}: {os.path.basename(p)}" for i, p in enumerate(selected_images)]

    print("Running run_model on chosen images...")
    with torch.no_grad():
        predictions = run_model(target_dir, model, image_paths=selected_images)

    # Save predictions
    prediction_save_path = os.path.join(target_dir, "predictions.npz")
    # ensure predictions keys are numpy arrays (they should be already from run_model)
    np.savez(prediction_save_path, **predictions)

    # Handle None frame_filter
    if frame_filter is None:
        frame_filter = "All"

    # Build a GLB file name
    glbfile = os.path.join(
        target_dir,
        f"Reconstruction.glb",
    )

    # Convert predictions to GLB
    glbscene = predictions_to_glb(
        predictions,
        conf_thres=conf_thres,
        filter_by_frames=frame_filter,
        mask_black_bg=mask_black_bg,
        mask_white_bg=mask_white_bg,
        show_cam=show_cam,
        mask_sky=mask_sky,
        target_dir=target_dir,
        prediction_mode=prediction_mode,
    )
    glbscene.export(file_obj=glbfile)

    # Cleanup
    del predictions
    gc.collect()
    torch.cuda.empty_cache()

    end_time = time.time()
    print(f"Total time: {end_time - start_time:.2f} seconds (including IO)")
    print(f"Reconstruction Success ({len(selected_images)} frames).")

    return glbfile

In [5]:
def update_visualization(
    target_dir, conf_thres, frame_filter, mask_black_bg, mask_white_bg, show_cam, mask_sky, prediction_mode, is_example
):
    """
    Reload saved predictions from npz, create (or reuse) the GLB for new parameters,
    and return it for the 3D viewer. If is_example == "True", skip.
    """

    # If it's an example click, skip as requested
    if is_example == "True":
        return None, "No reconstruction available. Please click the Reconstruct button first."

    if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
        return None, "No reconstruction available. Please click the Reconstruct button first."

    predictions_path = os.path.join(target_dir, "predictions.npz")
    if not os.path.exists(predictions_path):
        return None, f"No reconstruction available at {predictions_path}. Please run 'Reconstruct' first."

    key_list = [
        "pose_enc",
        "depth",
        "depth_conf",
        "world_points",
        "world_points_conf",
        "images",
        "extrinsic",
        "intrinsic",
        "world_points_from_depth",
    ]

    loaded = np.load(predictions_path)
    predictions = {key: np.array(loaded[key]) for key in key_list}

    glbfile = os.path.join(
        target_dir,
        f"Reconstruction.glb",
    )

    glbscene = predictions_to_glb(
        predictions,
        conf_thres=conf_thres,
        filter_by_frames=frame_filter,
        mask_black_bg=mask_black_bg,
        mask_white_bg=mask_white_bg,
        show_cam=show_cam,
        mask_sky=mask_sky,
        target_dir=target_dir,
        prediction_mode=prediction_mode,
    )
    glbscene.export(file_obj=glbfile)

    return glbfile

## Performing Reconstruction

In [6]:
target_dir = "../"
image_dir = os.path.join(target_dir, "Photos")
perform_reconstruction(target_dir=target_dir, image_dir=image_dir, n_select= 10)

Selected 10 equally distributed images for reconstruction.
Running run_model on chosen images...
Processing images from ../
Found 10 images to process
Preprocessed images shape: torch.Size([10, 3, 392, 518])
Running inference...


  with torch.cuda.amp.autocast(enabled=False):


Converting pose encoding to extrinsic and intrinsic matrices...
Computing world points from depth map...
Building GLB scene
Using Pointmap Branch
GLB Scene built
Total time: 91.99 seconds (including IO)
Reconstruction Success (10 frames).


'../Reconstruction.glb'

## Showing the Reconstruction

In [7]:
update_visualization(target_dir, 60, "All", True, False, False, False, "Predicted Pointmap", "False")

Building GLB scene
Using Pointmap Branch
GLB Scene built


'../Reconstruction.glb'

## Getting Point Cloud as PLY

In [8]:
import trimesh
import numpy as np
import open3d as o3d

# Load the GLB file
scene = trimesh.load(os.path.join(target_dir, 'Reconstruction.glb'))
# Some GLB exports may use a different geometry key; try to find the first geometry
if hasattr(scene, 'geometry') and len(scene.geometry) > 0:
    # pick the first geometry
    point_cloud_trimesh = list(scene.geometry.values())[0]
else:
    # fallback if scene is a PointCloud directly
    point_cloud_trimesh = scene

points = point_cloud_trimesh.vertices
colors = point_cloud_trimesh.colors if hasattr(point_cloud_trimesh, 'colors') else None

# Create Open3D point cloud
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
if colors is not None:
    try:
        pcd.colors = o3d.utility.Vector3dVector(colors[:, :3] / 255.0)
    except Exception:
        pass

# Save as .ply
ply_path = os.path.join(target_dir, 'Reconstruction_PC.ply')
ok = o3d.io.write_point_cloud(ply_path, pcd)

# Print number of points and file path
num_points = len(points) if points is not None else 0
if ok:
    print(f"Saved point cloud with {num_points} points to: {ply_path}")
else:
    print(f"Failed to write point cloud to: {ply_path}")

# Visualize
try:
    o3d.visualization.draw_geometries([pcd], mesh_show_back_face=True)
except Exception as e:
    print(f"Visualizer error: {e}")

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Saved point cloud with 812104 points to: ../Reconstruction_PC.ply


## Output

| Number of Images | Using Segmentation? | Output number of points | Reconstruction Time in CPU | Point Cloud Reconstruction Quality (1-10) |
| - | - | - | - | - |
| 20 | No | 1,624,353 | 535.09s | 7 |
| 10 | No | 812,204 | 122.73s | 8 |
| 8 | No | 649,760 | 92.79s | 7 |
| 4 | No | 324,876 | 43.04s | 7 |
| 3 | No | 243,649 | 33.01s | 6 |
| 20 | Yes | 1,412,505 | 457.17s | 3 | 
| 10 | Yes | 702,301 | 173.50s | 2 |  
| 8 | Yes | 562,245 | 130.34s | 2 | 
| 4 | Yes | 273,803 | 59.2s | 2 | 
| 3 | Yes | 208,322 | 42.70s | 3 |