In [1]:
from transformers import pipeline
import torch
from PIL import Image
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

%load_ext autoreload
%autoreload 2

## Depth-Anything from HuggingFace


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
checkpoint = "depth-anything/Depth-Anything-V2-base-hf"
pipe = pipeline("depth-estimation", model=checkpoint, device=device)

In [None]:
i = 100

image_path = os.path.expanduser(
    "~/LunarAutonomyChallenge/output/data_collection_1/front_left/{}.png".format(i)
)
image = Image.open(image_path)

predictions = pipe(image)

# Plot image and predicted depth side by side
fig, axes = plt.subplots(1, 2, figsize=(20, 10), gridspec_kw={"wspace": 0, "hspace": 0})
axes[0].imshow(image, cmap="gray")
axes[1].imshow(predictions["depth"], cmap="gray")
for ax in axes:
    ax.axis("off")
plt.subplots_adjust(wspace=0, hspace=0)
plt.show()

## Apple Depth Pro


In [14]:
import depth_pro

In [None]:
# Load model and preprocessing transform
model, transform = depth_pro.create_model_and_transforms(device=device)
model.eval()

# Load and preprocess an image.
image, _, f_px = depth_pro.load_rgb(image_path)
image = transform(image)

# Run inference.
prediction = model.infer(image, f_px=f_px)
depth = prediction["depth"]  # Depth in [m].
focallength_px = prediction["focallength_px"]  # Focal length in pixels.

In [None]:
plt.imshow(depth.cpu(), cmap="gray")

## Stereo

- Stereo baseline = 0.162 m
- All cameras horizontal FOV = 1.22 radians (70 degrees)

"The cameras are modelled as perfect pinhole cameras with square pixels, there is no lens distortion. Lens flare from the sun is modelled, this should be considered as a potential source of error in segmentation and feature detection. Each camera has the same field of view of 1.22 radians (70 degrees). The resolution is set by the agent upon initialization in the sensors() method. The maximum resolution allowed is 2448 x 2048 pixels, if a resolution higher than this is requested the resolution will be clipped to the maximum and a warning will be given on the command line."

From discord:
"Effectively, there is no focal length because the simulator does not model a physical camera, it is modelled as a perfect pinhole camera. Normally, the focal length is given in mm and to relate a pixel coordinate to a line extending from the camera center into the world, you need to use the pixel dimensions in mm.

The trick is to express the focal length in terms of pixels. Draw the camera geometry in a diagram, the focal length is the distance between the camera center and the image plane. You also know the width and height of the sensor in terms of pixels because you set this in your agent set up, you also know the FOV of the camera. Using triangular geometric relations you can express the focal length in terms of pixels."


In [2]:
W, H = 1280, 720
FOV = 1.22  # radians
BASELINE = 0.162  # meters

data_path = Path("../../output/data_collection_1")

In [None]:
i = 40

left_image_path = data_path / "front_left" / f"{i}.png"
right_image_path = data_path / "front_right" / f"{i}.png"
left_image = Image.open(left_image_path)
right_image = Image.open(data_path / "front_right" / f"{i}.png")

# Plot image and predicted depth side by side
fig, axes = plt.subplots(1, 2, figsize=(20, 10), gridspec_kw={"wspace": 0, "hspace": 0})
axes[0].imshow(left_image, cmap="gray")
axes[1].imshow(right_image, cmap="gray")
for ax in axes:
    ax.axis("off")
plt.subplots_adjust(wspace=0, hspace=0)
plt.show()

### OpenCV


In [4]:
from lac.perception.depth import compute_stereo_depth

In [None]:
focal_length_x = W / (2 * np.tan(FOV / 2))
focal_length_y = H / (2 * np.tan(FOV / 2))

disparity, depth = compute_stereo_depth(
    np.array(left_image), np.array(right_image), BASELINE, focal_length_x, semi_global=False
)

In [None]:
print(focal_length_x, focal_length_y)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20, 10), gridspec_kw={"wspace": 0, "hspace": 0})
axes[0].imshow(disparity, cmap="gray")
axes[1].imshow(depth, cmap="gray")
for ax in axes:
    ax.axis("off")
plt.subplots_adjust(wspace=0, hspace=0)
plt.show()

# Stereo with segmentation


# Heightmap reprojection


In [None]:
heightmap_path = "../../data/heightmaps/Moon_Map_01_0_rep0.dat"
heightmap = np.load(heightmap_path, allow_pickle=True)
heightmap.shape

In [None]:
camera_pose = np.eye(4)