In [1]:
# Set project root
import os
import sys

# Manually set the path to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

In [2]:
from transformers import DepthProImageProcessorFast, DepthProForDepthEstimation
from PIL import Image
import torch
import matplotlib.pyplot as plt

In [3]:
# Load an example image
image_path = os.path.join(project_root, 'assets', 'images', 'scene_12.jpg')
image = Image.open(image_path)

# Load processor and model
processor = DepthProImageProcessorFast.from_pretrained("apple/DepthPro-hf")
model = DepthProForDepthEstimation.from_pretrained("apple/DepthPro-hf").to("cuda" if torch.cuda.is_available() else "cpu")

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [4]:
model.device

device(type='cpu')

In [5]:
# Prepare input
inputs = processor(images=image, return_tensors="pt", size=640).to(model.device)

In [None]:
# Inference
with torch.no_grad():
    outputs = model(**inputs)

ValueError: Image size 640x640 is too small to be scaled with scaled_images_ratios=[0.25, 0.5, 1] when patch_size=384.

: 

In [None]:
# Post-process to get depth map and camera parameters
post = processor.post_process_depth_estimation(outputs, target_sizes=[(image.height, image.width)])

In [None]:
depth_map = post[0]["predicted_depth"]
focal_length_px = post[0]["focal_length"]
field_of_view = post[0]["field_of_view"]  # optional

print(f"Depth map shape: {depth_map.shape}")
print(f"Estimated focal length (px): {focal_length_px}")
print(f"Estimated field of view (degrees): {field_of_view}")

In [None]:
# Display the image and the depth map
plt.figure(figsize=(12, 6))

# Original image
plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title("Original Image")
plt.axis("off")

# Depth map
plt.subplot(1, 2, 2)
plt.imshow(depth_map, cmap="plasma")
plt.title("Depth Map")
plt.axis("off")

plt.show()