<p align="center">
 <h2 align="center">📻 DepthFM: Fast Monocular Depth Estimation with Flow Matching</h2>
 <p align="center"> 
    Ming Gui<sup>*</sup> · Johannes S. Fischer<sup>*</sup> · Ulrich Prestel · Pingchuan Ma
 </p><p align="center"> 
    Dmytro Kotovenko · Olga Grebenkova · Stefan A. Baumann · Vincent Tao Hu · Björn Ommer
 </p>
 <p align="center"> 
    <b>CompVis Group, LMU Munich</b>
 </p>
  <p align="center"> <sup>*</sup> <i>equal contribution</i> </p>
</p>

 </p>

In [32]:
import torch
import einops
import numpy as np
from PIL import Image
from PIL.Image import Resampling
import matplotlib.pyplot as plt
from depthfm import DepthFM
from pathlib import Path
import cv2


In [33]:
torch.cuda.is_available()

True

## Define generator

In [34]:
def generator(img_abs_path):
    for entry in img_abs_path.iterdir():
        if entry.suffix == ".jpeg":
            yield entry

## Load Image

In [35]:
def resize_max_res(
    img: Image.Image, max_edge_resolution: int, resample_method=Resampling.BILINEAR
) -> Image.Image:
    """
    Resize image to limit maximum edge length while keeping aspect ratio.

    Args:
        img (`Image.Image`):
            Image to be resized.
        max_edge_resolution (`int`):
            Maximum edge length (pixel).
        resample_method (`PIL.Image.Resampling`):
            Resampling method used to resize images.

    Returns:
        `Image.Image`: Resized image.
    """
    original_width, original_height = img.size
    downscale_factor = min( max_edge_resolution / original_width, max_edge_resolution / original_height)

    new_width  = int(original_width * downscale_factor)
    new_height = int(original_height * downscale_factor)

    new_width  = round(new_width / 64) * 64
    new_height = round(new_height / 64) * 64

    # print(f"Resizing image from {original_width}x{original_height} to {new_width}x{new_height}")

    resized_img = img.resize((new_width, new_height), resample=resample_method)
    return resized_img, (original_width, original_height), (new_width, new_height)

In [36]:
def convert_to_tensor(im_fp: str) -> torch.Tensor:
    # set image filepath
    # im_fp = 'assets/towers/5H7UICByzy_sml.jpeg'
    
    # open the image
    im = Image.open(im_fp).convert('RGB')
    
    processing_res = 640
    im, orig_res, new_res = resize_max_res(im, processing_res)
    
    # convert to tensor and normalize to [-1, 1] range
    x = np.array(im)
    x = einops.rearrange(x, 'h w c -> c h w')
    x = x / 127.5 - 1
    x = torch.tensor(x, dtype=torch.float32)[None]
    
    # print(f"{'Shape':<10}: {x.shape}")
    # print(f"{'dtype':<10}: {x.dtype}")
    
    # display(im.resize(new_res))

    return x

## Inference

In [37]:
def infer_depth(model: DepthFM, x: torch.Tensor) -> torch.Tensor:
    dev = 'cuda:0'
    model = model.to(dev)
    depth = model.predict_depth(x.to(dev), num_steps=4, ensemble_size=12)
    
    # print(f"{'Depth':<10}: {depth.shape}")
    return depth

## Normalize

In [38]:
def normalize(arr: np.ndarray) -> np.ndarray:
    min_val, max_val = np.min(arr), np.max(arr)
    arr_normalized = (arr - min_val) / (max_val - min_val)  # Normalize to [0, 1]
    arr_normalized = (arr_normalized * 255).astype(np.uint8)  # Scale to [0, 255]
    return arr_normalized

## Visualize Result

In [39]:
def visualize(depth: torch.Tensor):
    plt.imshow(depth.squeeze().cpu().numpy(), cmap='turbo')
    plt.show()

## Apply color map & convert

In [40]:
def prep_for_save(arrd: np.array) -> Image:
    arr = depth.squeeze().numpy(force=True)
    arr_normalized = normalize(arr)
    colored = cv2.applyColorMap(arr_normalized, cv2.COLORMAP_JET)
    im = Image.fromarray(cv2.cvtColor(colored, cv2.COLOR_BGR2RGB))
    im = im.convert('RGB')
    return im

## Run

In [41]:
model = DepthFM('checkpoints/depthfm-v1.ckpt')  # load model
# im_fp = 'assets/towers/5H7UICByzy_sml.jpeg' # set image filepath
img_rel_path = 'assets/towers'
img_abs_path = Path.cwd() / img_rel_path
new_save_folder = img_abs_path.parent / Path(str(img_abs_path.name) + '_depth')
Path(new_save_folder).mkdir(parents=False, exist_ok=True)

for im_fp in generator(img_abs_path):
    x = convert_to_tensor(im_fp)  # convert image to tensor
    depth = infer_depth(model, x)  # infer image depths
    # visualize(depth)  # show visualization
    im = prep_for_save(arr)

    # save
    new_save_name = Path(str(im_fp.stem) + '_depth')
    save_path = new_save_folder / new_save_name.with_suffix('.jpeg')
    im.save(save_path)
