# Depth Map Estimation with Depth-Anything

### Understanding Depth Maps

**Depth maps** are representations of the distance between the camera and objects in a scene. Each pixel in a depth map corresponds to the distance from the camera to that point in the image.

### Introducing DepthAnything v2

**DepthAnything** is a model that estimates depth from single RGB images using a teacher-student architecture with both labeled and unlabeled data.

**How Does It Work?**

                                          Labeled Data (1.5M images)
                                                  ↓
                                          Teacher Training 
                                                  ↓
                           Unlabeled Data → Pseudo-labeling → Combined Dataset
                            (62M images)                            ↓
                                                     Student Training with Augmentations
                                                                    ↓
                                                               Final Model

1. **Initial Training**
   - Teacher model learns from 1.5M labeled depth images

2. **Knowledge Transfer** 
   - Teacher labels 62M unlabeled images
   - Student learns from both real and pseudo labels

3. **Depth estimation**
   - Takes regular RGB image
   - Extracts visual features via encoder
   - Decoder converts features to depth values
   - Fine-tuned version for real-world depth measurements

# Implementation

## Loading the depth model

In [1]:
import sys
import torch
from pathlib import Path
import logging
import warnings
import cv2
from typing import Any, Optional
import numpy as np
import csv

warnings.filterwarnings("ignore")

# Get the working directory of this file
working_dir = Path().resolve()
root_dir = working_dir.parent
print(f"Project directory: {root_dir}")

# Add the DepthAnything directory to sys.path
depth_anything_path = root_dir / 'models/Depth-Anything-V2/metric_depth'
sys.path.append(str(depth_anything_path))
from depth_anything_v2.dpt import DepthAnythingV2

Project directory: /mnt/gsdata/users/kremer/DepthMap


xFormers not available
xFormers not available


In [7]:
def load_model(depth_anything_path: Path, encoder: str = 'vitl', dataset: str = 'hypersim', max_depth: int = 20):
    # Model configuration based on encoder
    model_configs = {
        'vits': {
            'encoder': 'vits',
            'features': 64,
            'out_channels': [48, 96, 192, 384],
        },
        'vitb': {
            'encoder': 'vitb',
            'features': 128,
            'out_channels': [96, 192, 384, 768],
        },
        'vitl': {
            'encoder': 'vitl',
            'features': 256,
            'out_channels': [256, 512, 1024, 1024],
        }
    }
    
    if encoder not in model_configs:
        raise ValueError(f"Unsupported encoder: {encoder}")
    
    # Initialize the model with the specified configuration
    model = DepthAnythingV2(**{**model_configs[encoder], 'max_depth': max_depth})
    
    # Load model checkpoint (model with pre-trained weights)
    checkpoint = depth_anything_path / 'checkpoints' / f'depth_anything_v2_metric_{dataset}_{encoder}.pth'
    if not checkpoint.exists():
        raise FileNotFoundError(f"Model checkpoint not found at {checkpoint}")
    model.load_state_dict(torch.load(checkpoint, map_location='cpu'))
    
    # Determine device and move the model to it
    device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
    model.to(device)
    model.eval()
    
    return model, device

# Load the depth model
model, device = load_model(depth_anything_path, encoder='vitl', dataset='hypersim', max_depth=100)

## Depth estimation on single image

In [3]:
def infer_depth(model: Any, image_path: Path) -> Optional[np.ndarray]:
    logging.info(f"Estimating depth for {image_path}")
    
    # Read the image from the given path
    raw_img = cv2.imread(str(image_path))
    if raw_img is None:
        logging.error(f"Failed to read image: {image_path}")
        return None
    
    # Convert BGR to RGB
    rgb_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)
    
    # Infer depth using the model
    with torch.no_grad():
        depth = model.infer_image(rgb_img)
    
    return depth

In [4]:
def save_depth_map(output_folder: Path, image_path: Path, depth_map: np.ndarray) -> Path:
    # Create output paths and directories
    stem = image_path.stem
    depth_image_path = output_folder / "images" / f"{stem}_depth.jpg"
    depth_array_path = output_folder / "arrays" / f"{stem}_depth.npy"
    depth_image_path.parent.mkdir(parents=True, exist_ok=True)
    depth_array_path.parent.mkdir(parents=True, exist_ok=True)

    # Save visualization (jpeg requires integer values)
    normalized = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX)
    colored = cv2.applyColorMap(normalized.astype(np.uint8), cv2.COLORMAP_VIRIDIS)
    cv2.imwrite(str(depth_image_path), colored)
    
    # Save array (store as float32)
    np.save(depth_array_path, depth_map)
    
    return depth_image_path

In [9]:
def compute_statistics(depth_map: np.ndarray) -> tuple:
    quantile05 = np.percentile(depth_map, 5)
    quantile95 = np.percentile(depth_map, 95)
    mean = np.mean(depth_map)
    median = np.median(depth_map)
    return quantile05, quantile95, mean, median

def write_csv(csv_path: Path, data: list, headers: Optional[list] = None):
    # Append data to CSV
    file_exists = csv_path.exists()
    with csv_path.open('a', newline='') as f:
        writer = csv.writer(f)
        if headers and not file_exists:
            writer.writerow(headers)
        writer.writerow(data)

def process_images(model: Any, input_folder: Path, output_folder: Path, csv_path: Path):
    # Check if all required paths are provided
    if not input_folder or not output_folder or not csv_path:
        raise ValueError("All paths (input_folder, output_folder, csv_path) must be provided")
    
    # Check if input folder exists
    if not input_folder.exists():
        raise FileNotFoundError(f"Input folder does not exist: {input_folder}")
    
    if not input_folder.is_dir():
        raise NotADirectoryError(f"Input path is not a directory: {input_folder}")
    
    print(f"Processing images from {input_folder}")
    images = [p for p in Path(input_folder).rglob('*') if p.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff']]
    
    # Create output directory if it doesn't exist
    output_folder.mkdir(parents=True, exist_ok=True)
    
    headers = ['filename', 'Quantile05', 'Quantile95', 'Mean', 'Median']
    
    # Process each image
    for img_path in images:
        print("-" * 100)
        print(f"::: Estimating depth for {img_path} :::")
        depth = infer_depth(model, img_path)
        
        if depth is None:
            continue
        
        print(f"::: Computing statistics and saving depth map :::")
        q05, q95, mean, median = compute_statistics(depth)
        write_csv(csv_path, [img_path.name, q05, q95, mean, median], headers=headers)
        depth_image_path = save_depth_map(output_folder, img_path, depth)
        print(f"Processed {img_path.name} -> Saved Depth Map at {depth_image_path}")

# Running the model

In [10]:
# Define input and output directories (change as needed)
input_folder = root_dir / 'test/images/'
output_folder = root_dir / 'test/output/depth_map/'
csv_file_path = root_dir / 'test/output/depth_map/depth.csv'

# Process all images in the input directory
process_images(model, input_folder, output_folder, csv_file_path)

Processing images from /mnt/gsdata/users/kremer/DepthMap/test/images
----------------------------------------------------------------------------------------------------
::: Estimating depth for /mnt/gsdata/users/kremer/DepthMap/test/images/G5Bullet_56_2024-10-09_01_21_00_calibrated.jpg :::
::: Computing statistics and saving depth map :::
Processed G5Bullet_56_2024-10-09_01_21_00_calibrated.jpg -> Saved Depth Map at /mnt/gsdata/users/kremer/DepthMap/test/output/depth_map/images/G5Bullet_56_2024-10-09_01_21_00_calibrated_depth.jpg
----------------------------------------------------------------------------------------------------
::: Estimating depth for /mnt/gsdata/users/kremer/DepthMap/test/images/G5Bullet_55_2024-10-08_01_36_00_calibrated.jpg :::
::: Computing statistics and saving depth map :::
Processed G5Bullet_55_2024-10-08_01_36_00_calibrated.jpg -> Saved Depth Map at /mnt/gsdata/users/kremer/DepthMap/test/output/depth_map/images/G5Bullet_55_2024-10-08_01_36_00_calibrated_depth