In [1]:
import torch
import time
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np
from dataset_loaders import NYUV2DataSet, DA2KDataSet
from loss import eval_depth, eval_accuracy


from transformers import HqqConfig
from transformers import AutoModelForDepthEstimation, AutoImageProcessor

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
da_data = DA2KDataSet("datasets/DA-2K")

In [3]:
example_image = da_data.__getitem__(0)

In [4]:
nyuv2_data = NYUV2DataSet(dataset_path="datasets/NYUV2",csv_name="nyu2_test.csv")

In [5]:
nyuv2_data.__len__()

654

In [6]:
example_images = nyuv2_data.__getitem__(2)

In [7]:
example_images[1].shape

torch.Size([1, 518, 518])

In [14]:
quant_config = HqqConfig(nbits=1)
depth_anything_checkpoint = "LiheYoung/depth-anything-small-hf"
depth_anything_processor = AutoImageProcessor.from_pretrained(depth_anything_checkpoint,
                                                               device_map='auto', 
                                                               quantization_config=quant_config)
depth_anything_model = AutoModelForDepthEstimation.from_pretrained(depth_anything_checkpoint, 
                                                                   device_map='auto', 
                                                                   quantization_config=quant_config)

In [15]:
def get_curr_time_ms():
    return int(time.time()*1000)

In [16]:
def get_depth_from_model(input_image, image_processor, depth_model):
    inputs = image_processor(input_image, return_tensors="pt")
   
    with torch.no_grad():
        if inputs["pixel_values"].device != depth_model.device:
            inputs["pixel_values"] = inputs["pixel_values"].to(depth_model.device)
        start_time = get_curr_time_ms()
        outputs = depth_model(**inputs)
        end_time = get_curr_time_ms() - start_time
        predicted_depth = outputs.predicted_depth
    return predicted_depth, end_time

### NYUV2 Data Testing

In [17]:
def collect_nyu2_results(nyu2_dataset, image_processor, depth_model):
    absrel_scores = []
    d1_scores = []
    average_inference_ms = []
    for i in range(nyu2_dataset.__len__()):
        input_image, depth_target = nyu2_dataset.__getitem__(i)
        
        input_image = input_image.to(depth_model.device)
        depth_target = depth_target.to(depth_model.device)
        
        predicted_depth, inference_ms = get_depth_from_model(input_image, image_processor, depth_model)
        average_inference_ms.append(inference_ms)

        valid_mask = [predicted_depth >= 0.0001]

        eval_results = eval_depth(predicted_depth[valid_mask], depth_target[valid_mask])
        absrel_scores.append(eval_results["abs_rel"])
        d1_scores.append(eval_results["d1"])

    return (sum(absrel_scores) / len(absrel_scores), sum(d1_scores) / len(d1_scores), sum(average_inference_ms) / len(average_inference_ms))

In [18]:
collect_nyu2_results(nyuv2_data, depth_anything_processor, depth_anything_model)

(0.9976809537009727, 0.0, 27.396024464831804)

In [None]:
inputs = depth_anything_processor(images=example_images[0], return_tensors="pt")
with torch.no_grad():
    outputs = depth_anything_model(**inputs)
    predicted_depth = outputs.predicted_depth

In [None]:
np.swapaxes(example_images[0].squeeze().cpu().numpy(),2,0).shape

In [None]:
predicted_depth.dtype

In [None]:
example_images[1].dtype

In [None]:
f, axarr = plt.subplots(1,3)
axarr[0].imshow(transforms.ToPILImage()(example_images[0]))
axarr[1].imshow(example_images[1].squeeze().cpu().numpy())
axarr[2].imshow(predicted_depth.squeeze().cpu().numpy())
plt.show()

In [None]:
eval_depth(predicted_depth, example_images[1])

### DA-2K Data Testing

In [None]:
def collect_da2K_results(da2k_dataset, image_processor, depth_model):
    closer_match = []
    average_inference_ms = []
    for i in range(da2k_dataset.__len__()):
        da2k_image_data = da2k_dataset.__getitem__(i)
        input_image = da2k_image_data["image"]
        resizer = transforms.Resize(size=input_image.size)
        predicted_depth, inference_ms = get_depth_from_model(input_image, image_processor, depth_model)
        predicted_depth = resizer(predicted_depth).squeeze().cpu()
        average_inference_ms.append(inference_ms)
        closer_match.append(eval_accuracy(predicted_depth, da2k_image_data["points"], da2k_image_data["closer_point"]))


    return (sum([1 if x is True else 0 for x in closer_match]) / len(closer_match), sum(average_inference_ms) / len(average_inference_ms))

In [None]:
collect_da2K_results(da_data, depth_anything_processor, depth_anything_model)

### Testing Different Model Quantizations and No Quantizations

In [None]:
def run_nyu_da2K_quantization_tests(da_data, nyuv2_data, model_weights, quant_configs):
    config_trials = {}
    csv_string = "quantization_state, abs_rel, d1, accuracy, nyu2_time, da2k_time, model_size \n"
    for config_name, config in quant_configs.items():
        torch._C._cuda_clearCublasWorkspaces()
        image_processor = AutoImageProcessor.from_pretrained(model_weights, quantization_config=config, device_map="auto")
        depth_model = AutoModelForDepthEstimation.from_pretrained(model_weights, quantization_config=config, device_map="auto")
        model_size = torch.cuda.memory_allocated() / 1024**2

        abs_rel, d1, nyu2_time = collect_nyu2_results(nyuv2_data, image_processor, depth_model)
        accuracy, da2k_time = collect_da2K_results(da_data, image_processor, depth_model)
        config_trials[config_name] = {"abs_rel":abs_rel, 
                                      "d1":d1, "accuracy":accuracy, 
                                      "nyu2_time":nyu2_time, 
                                      "da2k_time":da2k_time, 
                                      "model_size":model_size
                                    }

        csv_string += config_name + ", " +\
                      str(abs_rel) + ", " +\
                      str(d1) + ", " +\
                      str(accuracy) + ", " +\
                      str(nyu2_time) + ", " +\
                      str(da2k_time) + ", " +\
                      str(model_size) + "\n"
    
    return config_trials, csv_string

In [None]:
configs_to_run = {"no_quantization":None ,
                  "hqq_8bit":HqqConfig(weights="int8", group_size=64), 
                  "hqq_4bit":HqqConfig(weights="int4", group_size=64)
                  }

results_json, results_csv = run_nyu_da2K_quantization_tests(da_data=da_data, nyuv2_data=nyuv2_data, model_weights=depth_anything_checkpoint, quant_configs=configs_to_run)

In [None]:
print(results_json)
print(results_csv)