In [None]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from transformers import pipeline

import pandas as pd
import os

from image_feature_extraction.utils import *

# Pipeline

In [None]:
# Depth Anything Metric
# The maximum depth to use for the “metric” depth estimation head. 20 should be 
# used for indoor models and 80 for outdoor models.
# Trained on Kitti synthetic, LiDAR, max range 80 meters
checkpoint = "depth-anything/Depth-Anything-V2-Metric-Outdoor-Large-hf"
pipe_metric = pipeline("depth-estimation", model=checkpoint)


# Alternative models:

# DPT Large Relative
# dpt-large is _relative_ depth estimation, meaning taking the mean will show us
# are the objects, on average, closer or further away from the closes or the 
# farthest object.
# > This model is trained on an immense amount of training data. 
# This is made possible by only leveraging the relative depths in the data 
# sources instead of absolute metric depths values. Many of the data sources do 
# not provide absolute depth to begin with (3D movies). Thus every prediction is 
# relative [inverse] depth scaled to the depth range of each image 
# (d_min -> 0, d_max -> 255). For the comparison to state of the art methods,
#  this depth range is taken from the ground truth images.

# checkpoint = "Intel/dpt-large"
# pipe = pipeline("depth-estimation", model=checkpoint)

# Depth Anything Relative
# > Depth-Anything-V2-Large-hf is still a relative (normalized) depth estimator—
# not a metric/absolute one. The model card tags it as “relative depth,” and its
#  zero-shot pipeline returns a per-image normalized depth map (e.g. values in 
# an arbitrary scale) rather than real-world distances .

# checkpoint = "depth-anything/Depth-Anything-V2-Large-hf"
# pipe_depth_anything = pipeline("depth-estimation", model=checkpoint)

In [None]:
folder_path = "./data/Most_extreme_pictures"

In [None]:
def process_image(file_path):
    result = {}
    
    file_name = os.path.basename(file_path)
    result["file_name"] = file_name 
    image = Image.open(file_path)     
   
    predictions = pipe_metric(image)
    predicted_depth = predictions["predicted_depth"].numpy()

    result['depth_metric_mean'] = np.mean(predicted_depth)
    result['depth_metric_sd'] = np.std(predicted_depth)

    plt.figure()
    plt.imshow(image)
    plt.show()

    plt.hist(predicted_depth.flatten())
    plt.show()  

    plt.imshow(predicted_depth, cmap='coolwarm')
    plt.axis("off")
    plt.title("Depth Heatmap")
    plt.colorbar()
    plt.show()    

    return result

do_for_each_jpeg_file_in_folder(folder_path, process_image)