In [None]:
import torch
from pathlib import Path

from depth_anything_v2.dpt import DepthAnythingV2

import cv2
import numpy as np
import matplotlib.pyplot as plt

import utils

In [None]:
model_select = "vitb"

model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
}

weights_path = f"weights/depth_anything_v2_{model_select}.pth"

In [None]:
model = DepthAnythingV2(**model_configs[model_select]).eval()
model.load_state_dict(torch.load(weights_path, map_location='cpu'))

In [None]:
image_url = "https://images.pexels.com/photos/5740792/pexels-photo-5740792.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1"
image = np.array(utils.download_image(image_url))

# Check Pytorch Inference

In [None]:
def image_preprocess(image):
    """
        Input:
            image: RGB image, [Height, Width, Channels] as numpy array
        Output:
            input_tensor, (h_o, w_o)
        input_tensor -> ready to feed the model
        and original height and width of the given image
    """
    # save original shape
    image_size = image.shape[:2]
    # normalize [0, 1]
    input_tensor = image / 255.0
    # Resize to [518, 518]
    input_tensor = cv2.resize(input_tensor, dsize=[518, 518], interpolation=cv2.INTER_CUBIC)

    # mean and std
    mean = [0.485, 0.456, 0.406]
    std  = [0.229, 0.224, 0.225]
    input_tensor = (input_tensor - mean) / std

    # turn it channels first.
    # (h, w, c) -> (c, h, w)
    input_tensor = np.transpose(input_tensor, (2, 0, 1))

    # add batch size
    input_tensor = np.expand_dims(input_tensor, 0)

    # force dtype to float32
    input_tensor = input_tensor.astype("float32")
    return input_tensor, image_size

def postprocess(model_output, image_size):
    depth = model_output.cpu().detach().numpy()[0]
    h, w = image_size
    depth = cv2.resize(depth, dsize=(w, h), interpolation=cv2.INTER_AREA)
    return depth

In [None]:
input_tensor, image_size = image_preprocess(image)

In [None]:
depth = model.forward(torch.from_numpy(input_tensor))
depth = postprocess(depth, image_size)

In [None]:
plt.imshow(depth)
plt.axis("off")
plt.show()

# Convert to OpenVINO IR

In [None]:
import openvino as ov
ov_model_path = Path("models_ov") / Path(Path(weights_path).name.replace(".pth", ".xml"))
if not ov_model_path.exists():
    ov_model = ov.convert_model(model, example_input=input_tensor, input=[1, 3, 518, 518])
    ov.save_model(ov_model, ov_model_path)