# Depth Detection Pipeline Notebook
This notebook is a development version of the original `depth_any_thing.py` script. It is organized into logical sections for easier experimentation and modification.

## 1. Import Required Libraries
Import all necessary libraries for the pipeline, including computer vision, deep learning, and utility modules.

In [1]:
import cv2
import torch
import numpy as np
import threading
from queue import Queue
from ultralytics import YOLO
from depth_anything_v2.dpt import DepthAnythingV2
from PIL import Image

xFormers not available
xFormers not available


## 2. Configuration & Hyperparameters
Define all configuration variables and hyperparameters for the pipeline, including model URLs, video source, and processing parameters.

In [2]:
# CONFIG & HYPERPARAMETERS
YOLO_MODEL_URL = "https://ai-public-videos.s3.us-east-2.amazonaws.com/weights/obb.pt"
VIDEO_URL = "https://ai-public-videos.s3.us-east-2.amazonaws.com/Raw+Videos/Navirox/sorted/accident_left_2.mp4"

# Performance & Display
SCALE_FACTOR = 0.2
INFERENCE_RES = (int(400 * SCALE_FACTOR), int(700 * SCALE_FACTOR))  
DISPLAY_WIDTH, DISPLAY_HEIGHT = 400, 700
FPS = 30  # Targeted output FPS

# Depth Calibration
ALPHA = 0.15         # Temporal smoothing (lower = smoother)
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# Async Queues
input_queue = Queue(maxsize=5)
stop_event = threading.Event()

# State for temporal smoothing
depth_history = {}

# ML-DepthPro expects RGB float32 images in [0,1]

## 3. Async Video Reader
Define the asynchronous video reader function that reads frames from the video source and puts them into a queue for processing.

In [3]:
def frame_reader(url):
    cap = cv2.VideoCapture(url)
    if not cap.isOpened():
        print("[ERROR] Could not open video stream.")
        stop_event.set()
        return

    while not stop_event.is_set():
        ret, frame = cap.read()
        if not ret:
            stop_event.set()
            break
        
        # Pre-resize to model native resolution to save VRAM and transfer time
        resized = cv2.resize(frame, INFERENCE_RES)
        if not input_queue.full():
            input_queue.put(resized)
    cap.release()

## 4. Model Initialization
Load the YOLO and DepthAnythingV2 models, and move them to the appropriate device.

## 4b. ML-DepthPro Model Setup
Install and load the ML-DepthPro model for depth estimation.

In [4]:
# Install ML-DepthPro (if not already installed)
# !pip install git+https://github.com/apple/ml-depth-pro.git

# Import ML-DepthPro
import depth_pro

# Load ML-DepthPro model
model, transform = depth_pro.create_model_and_transforms()
model = model.to(DEVICE)
model.eval()

  from .autonotebook import tqdm as notebook_tqdm


DepthPro(
  (encoder): DepthProEncoder(
    (patch_encoder): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): LayerScale()
          (drop_path1): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp)

In [5]:
print(f"[INFO] Initializing Models on {DEVICE}...")

# Load YOLO
yolo_model = YOLO(YOLO_MODEL_URL)

# Load ML-DepthPro (ViT-B)
# Already loaded in the new cell above

[INFO] Initializing Models on cuda:0...
Found https://ai-public-videos.s3.us-east-2.amazonaws.com/weights/obb.pt locally at C:\Users\My PC\Documents\Depth-Detection-Task\weights\obb.pt


## 5. Main Inference & Rendering Pipeline
Define the main function that performs inference, processes detections, applies depth estimation, and renders the results.

In [6]:
def run_pipeline():
    # Minimal pipeline for debugging: just run DepthPro on a single frame and display
    print("[INFO] Minimal pipeline: waiting for a frame...")
    window_name = "DepthPro Debug"
    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)

    # Wait for a frame
    while input_queue.empty() and not stop_event.is_set():
        cv2.waitKey(1)
    if stop_event.is_set():
        print("[INFO] Stopped before frame received.")
        return

    frame = input_queue.get()
    print("[INFO] Got a frame, running DepthPro...")
    try:
        with torch.no_grad():
            color_converted = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_img = Image.fromarray(color_converted)
            input_tensor = transform(pil_img).unsqueeze(0).to(DEVICE)
            prediction = model.infer(input_tensor)
            depth_map = prediction["depth"].squeeze().cpu().numpy()
    except Exception as e:
        print(f"DepthPro Error: {e}")
        return

    # Normalize and display depth map
    depth_vis = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-6)
    depth_colormap = cv2.applyColorMap((depth_vis * 255).astype(np.uint8), cv2.COLORMAP_PLASMA)
    cv2.imshow(window_name, depth_colormap)
    print("[INFO] Showing depth map. Press any key to exit window.")
    cv2.waitKey(0)
    cv2.destroyAllWindows()

## 6. Execution Entry Point
Start the async video reader thread and run the main pipeline. Handles graceful shutdown on interruption.

In [None]:
# Start the async reader
reader_thread = threading.Thread(target=frame_reader, args=(VIDEO_URL,), daemon=True)
reader_thread.start()

try:
    run_pipeline()
except KeyboardInterrupt:
    stop_event.set()
finally:
    stop_event.set()
    print("[INFO] Pipeline shut down successfully.")

[INFO] Minimal pipeline: waiting for a frame...
[INFO] Got a frame, running DepthPro...
