In [3]:
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
import torch
import numpy as np
from PIL import Image
import requests
from glob import glob

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf")
model.to("cuda") 


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


DepthAnythingForDepthEstimation(
  (backbone): Dinov2Backbone(
    (embeddings): Dinov2Embeddings(
      (patch_embeddings): Dinov2PatchEmbeddings(
        (projection): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): Dinov2Encoder(
      (layer): ModuleList(
        (0-23): 24 x Dinov2Layer(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attention): Dinov2SdpaAttention(
            (attention): Dinov2SdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): Dinov2SelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0

In [5]:
import os
from tqdm import tqdm

In [8]:
indoor_path = "/home/coder/low-power-cv-challenge-track3/data/eval/indoor"
label_indoor_path = "/home/coder/low-power-cv-challenge-track3/data/eval/indoor_labels"
for f in tqdm(glob(indoor_path+'**/*')):
    image = Image.open(f)
    inputs = image_processor(images=image, return_tensors="pt")
    inputs = {key: value.to("cuda") for key, value in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    post_processed_output = image_processor.post_process_depth_estimation(
    outputs,
    target_sizes=[(448,448)],
    )
    predicted_depth = post_processed_output[0]["predicted_depth"]
    depth = (predicted_depth - predicted_depth.min()) / (predicted_depth.max() - predicted_depth.min())
    torch.save(depth,f"{label_indoor_path}/{os.path.basename(f).split('.')[0]}.pt")
    



100%|██████████| 100/100 [00:07<00:00, 13.80it/s]


In [9]:
outdoor_path = "/home/coder/low-power-cv-challenge-track3/data/eval/outdoor"
label_outdoor_path = "/home/coder/low-power-cv-challenge-track3/data/eval/outdoor_labels"
for f in tqdm(glob(outdoor_path+'**/*')):
    image = Image.open(f)
    inputs = image_processor(images=image, return_tensors="pt")
    inputs = {key: value.to("cuda") for key, value in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    post_processed_output = image_processor.post_process_depth_estimation(
    outputs,
    target_sizes=[(448,448)],
    )
    predicted_depth = post_processed_output[0]["predicted_depth"]
    depth = (predicted_depth - predicted_depth.min()) / (predicted_depth.max() - predicted_depth.min())
    torch.save(depth,f"{label_outdoor_path}/{os.path.basename(f).split('.')[0]}.pt")
    



100%|██████████| 100/100 [00:07<00:00, 13.82it/s]
