In [None]:
# Install required packages
%pip install open3d Pillow ultralytics supervision


In [None]:
# Import necessary libraries
import open3d as o3d
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import os
from ultralytics import YOLO
import supervision as sv


In [None]:
# Configuration
# UPDATE THESE PATHS FOR YOUR SETUP:
BAG_FILE_PATH = "/content/drive/MyDrive/data 1 (GEDUNG P)/20250503_165038.bag"  # Update this path
MODEL_PATH = "best.pt"  # Path to your YOLO v11 model

# Output settings
OUTPUT_VIDEO_PATH = "depth_detection_output.mp4"
FPS = 6
FRAME_WIDTH = 1280
FRAME_HEIGHT = 720

# Processing settings
MAX_FRAMES = 100  # Set to None to process all frames
CONSECUTIVE_EMPTY_LIMIT = 5

# Performance optimization settings
DETECTION_INTERVAL = 3  # Run detection every N frames (use tracking for others)
YOLO_INPUT_SIZE = 640   # Smaller size = faster inference (320, 640, 1280)
USE_HALF_PRECISION = True  # Use FP16 for faster inference (if GPU available)
SKIP_SIMILAR_FRAMES = True  # Skip processing if frame hasn't changed much
FRAME_DIFF_THRESHOLD = 0.05  # Threshold for frame similarity (0-1)

# Depth calculation settings
DEPTH_METHOD = "center_region"  # Use fastest method by default
CENTER_REGION_RATIO = 0.6  # Use center 60% of bounding box
DEPTH_OUTLIER_THRESHOLD = 2.0  # Standard deviations for outlier removal

# Label annotation settings
LABEL_FONT_SCALE = 0.8      # Font size (0.3-2.0, larger = bigger text)
LABEL_FONT_THICKNESS = 2    # Font thickness (1-3, higher = bolder)
LABEL_PADDING = 8           # Padding around text (5-15 pixels)

print("Configuration loaded:")
print(f"BAG file: {BAG_FILE_PATH}")
print(f"Model: {MODEL_PATH}")
print(f"Output: {OUTPUT_VIDEO_PATH}")
print(f"Max frames: {MAX_FRAMES}")
print(f"Depth calculation method: {DEPTH_METHOD}")

# Video codec settings (will be initialized after BAG file is verified)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')


In [None]:
# Load YOLO v11 model
print("Loading YOLO v11 model...")

# Check if model file exists
if not os.path.exists(MODEL_PATH):
    print(f"Error: Model file not found at {MODEL_PATH}")
    print("Please update the MODEL_PATH in the configuration cell")
    print("Make sure your 'best.pt' file is in the correct location")
    raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")

try:
    model = YOLO(MODEL_PATH)
    print(f"Model loaded successfully from {MODEL_PATH}")
    
    # Performance optimizations
    model.overrides['imgsz'] = YOLO_INPUT_SIZE  # Set input size for faster inference
    if USE_HALF_PRECISION:
        try:
            model.half()  # Use FP16 precision for faster inference
            print("Using half precision (FP16) for faster inference")
        except:
            print("Half precision not available, using full precision")
    
    # Warmup the model with a dummy image for better performance
    import torch
    dummy_img = torch.zeros((1, 3, YOLO_INPUT_SIZE, YOLO_INPUT_SIZE))
    if torch.cuda.is_available():
        model.to('cuda')
        dummy_img = dummy_img.cuda()
        print("Using GPU acceleration")
    
    _ = model(dummy_img, verbose=False)  # Warmup
    print("Model warmed up for optimal performance")
    
except Exception as e:
    print(f"Error loading YOLO model: {e}")
    print("Please check if the model file is valid and compatible with ultralytics")
    raise

# Initialize tracker and annotators
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()

# Configure label annotator with custom font size
# text_scale: Controls font size (default is 0.5, try 0.7-1.5 for larger text)
# text_thickness: Controls font thickness (default is 1)
# text_padding: Controls padding around text (default is 5)
label_annotator = sv.LabelAnnotator(
    text_scale=LABEL_FONT_SCALE,        # Font size from configuration
    text_thickness=LABEL_FONT_THICKNESS, # Font thickness from configuration
    text_padding=LABEL_PADDING,          # Padding from configuration
    text_color=sv.Color.WHITE            # Optional: set text color
)
print("Tracker and annotators initialized with custom font settings")


In [None]:
# Open BAG file
print(f"Opening BAG file: {BAG_FILE_PATH}")

# Check if file exists first
if not os.path.exists(BAG_FILE_PATH):
    print(f"Error: BAG file not found at {BAG_FILE_PATH}")
    print("Please update the BAG_FILE_PATH in the configuration cell")
    raise FileNotFoundError(f"BAG file not found: {BAG_FILE_PATH}")

try:
    bag_reader = o3d.t.io.RSBagReader()
    bag_reader.open(BAG_FILE_PATH)
    print("BAG file opened successfully")
except Exception as e:
    print(f"Error opening BAG file: {e}")
    print("Please check the file path and ensure the file is a valid BAG file")
    raise


In [None]:
# Optional: Help find your files (run this if you need to locate your BAG or model files)
print("Current working directory:", os.getcwd())
print("\nLooking for BAG files (*.bag):")
for root, dirs, files in os.walk('.'):
    for file in files:
        if file.endswith('.bag'):
            print(f"  Found: {os.path.join(root, file)}")

print("\nLooking for YOLO model files (*.pt):")
for root, dirs, files in os.walk('.'):
    for file in files:
        if file.endswith('.pt'):
            print(f"  Found: {os.path.join(root, file)}")

print("\nIf you don't see your files, update the paths in the configuration cell above.")


In [None]:
def calculate_depth_center_region_fast(depth_array, bbox, center_ratio=0.6):
    """
    Optimized depth calculation using only the center region
    Faster version with minimal computations
    """
    x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
    
    # Fast center region calculation
    width, height = x2 - x1, y2 - y1
    margin_x, margin_y = int(width * (1 - center_ratio) * 0.5), int(height * (1 - center_ratio) * 0.5)
    
    # Bounds checking with max/min operations
    center_x1 = max(0, x1 + margin_x)
    center_y1 = max(0, y1 + margin_y)
    center_x2 = min(depth_array.shape[1], x2 - margin_x)
    center_y2 = min(depth_array.shape[0], y2 - margin_y)
    
    if center_x2 <= center_x1 or center_y2 <= center_y1:
        return 0.0
    
    # Direct slicing and vectorized operations
    bbox_depth = depth_array[center_y1:center_y2, center_x1:center_x2]
    
    # Faster boolean indexing
    mask = (bbox_depth > 0) & (bbox_depth < 10000)
    valid_depths = bbox_depth[mask]
    
    return np.mean(valid_depths) if valid_depths.size > 0 else 0.0


def calculate_depth_center_region(depth_array, bbox, center_ratio=0.6):
    """Wrapper for backward compatibility"""
    return calculate_depth_center_region_fast(depth_array, bbox, center_ratio)



def calculate_depth_robust_center(depth_array, bbox, center_ratio=0.6, outlier_threshold=2.0):
    """
    Combine center region with statistical outlier removal
    This is often the most effective method
    """
    x1, y1, x2, y2 = map(int, bbox)
    
    # Calculate center region
    width = x2 - x1
    height = y2 - y1
    
    margin_x = int(width * (1 - center_ratio) / 2)
    margin_y = int(height * (1 - center_ratio) / 2)
    
    center_x1 = max(0, x1 + margin_x)
    center_y1 = max(0, y1 + margin_y)
    center_x2 = min(depth_array.shape[1], x2 - margin_x)
    center_y2 = min(depth_array.shape[0], y2 - margin_y)
    
    if center_x2 <= center_x1 or center_y2 <= center_y1:
        return 0.0
    
    bbox_depth = depth_array[center_y1:center_y2, center_x1:center_x2]
    valid_depth = bbox_depth[(bbox_depth > 0) & (bbox_depth < 10000)]
    
    if len(valid_depth) < 5:
        return 0.0
    
    # Remove statistical outliers
    mean_depth = np.mean(valid_depth)
    std_depth = np.std(valid_depth)
    
    # Keep only values within threshold standard deviations
    filtered_depth = valid_depth[
        np.abs(valid_depth - mean_depth) <= outlier_threshold * std_depth
    ]
    
    return np.mean(filtered_depth) if len(filtered_depth) > 0 else mean_depth


def calculate_depth_percentile(depth_array, bbox, percentile=50):
    """
    Use percentile instead of mean to be more robust to outliers
    percentile=50 is median, which is less affected by background pixels
    """
    x1, y1, x2, y2 = map(int, bbox)
    x1, y1 = max(0, x1), max(0, y1)
    x2 = min(depth_array.shape[1], x2)
    y2 = min(depth_array.shape[0], y2)
    
    bbox_depth = depth_array[y1:y2, x1:x2]
    valid_depth = bbox_depth[(bbox_depth > 0) & (bbox_depth < 10000)]
    
    if len(valid_depth) > 0:
        return np.percentile(valid_depth, percentile)
    else:
        return 0.0


def calculate_avg_depth_in_bbox(depth_array, bbox, method="robust_center"):
    """
    Main depth calculation function that supports multiple methods
    
    Args:
        depth_array: numpy array of depth values
        bbox: bounding box coordinates [x1, y1, x2, y2]
        method: calculation method
            - "center_region": Use only center portion of bbox
            - "robust_center": Center region + outlier removal (recommended)
            - "percentile": Use median instead of mean
    
    Returns:
        float: calculated depth in millimeters
    """
    if method == "center_region":
        return calculate_depth_center_region(depth_array, bbox, CENTER_REGION_RATIO)
    elif method == "robust_center":
        return calculate_depth_robust_center(depth_array, bbox, CENTER_REGION_RATIO, DEPTH_OUTLIER_THRESHOLD)
    elif method == "percentile":
        return calculate_depth_percentile(depth_array, bbox, 50)  # Use median
    else:
        # Fallback to original simple method
        x1, y1, x2, y2 = map(int, bbox)
        x1, y1 = max(0, x1), max(0, y1)
        x2 = min(depth_array.shape[1], x2)
        y2 = min(depth_array.shape[0], y2)
        
        bbox_depth = depth_array[y1:y2, x1:x2]
        valid_depth = bbox_depth[(bbox_depth > 0) & (bbox_depth < 10000)]
        
        return np.mean(valid_depth) if len(valid_depth) > 0 else 0.0


In [None]:
def calculate_frame_difference(frame1, frame2):
    """Calculate normalized difference between two frames"""
    if frame1 is None or frame2 is None:
        return 1.0
    
    # Resize frames for faster comparison
    h, w = frame1.shape[:2]
    small_h, small_w = h // 4, w // 4
    
    frame1_small = cv2.resize(frame1, (small_w, small_h))
    frame2_small = cv2.resize(frame2, (small_w, small_h))
    
    # Convert to grayscale for faster comparison
    if len(frame1_small.shape) == 3:
        frame1_small = cv2.cvtColor(frame1_small, cv2.COLOR_RGB2GRAY)
        frame2_small = cv2.cvtColor(frame2_small, cv2.COLOR_RGB2GRAY)
    
    # Calculate normalized difference
    diff = np.abs(frame1_small.astype(np.float32) - frame2_small.astype(np.float32))
    return np.mean(diff) / 255.0


def process_frame_with_depth_optimized(color_frame, depth_frame, frame_idx, run_detection=True, previous_frame=None):
    """
    Optimized frame processing with conditional detection and faster depth calculation
    
    Args:
        color_frame: RGB color frame as numpy array
        depth_frame: Depth frame as numpy array
        frame_idx: Current frame index for detection interval
        run_detection: Whether to run YOLO detection this frame
        previous_frame: Previous frame for similarity check
    
    Returns:
        annotated_frame: Frame with annotations
        should_skip: Whether this frame should be skipped
    """
    
    # Skip similar frames if enabled
    if SKIP_SIMILAR_FRAMES and previous_frame is not None:
        frame_diff = calculate_frame_difference(color_frame, previous_frame)
        if frame_diff < FRAME_DIFF_THRESHOLD:
            return None, True  # Skip this frame
    
    # Only run detection every DETECTION_INTERVAL frames
    if run_detection or frame_idx % DETECTION_INTERVAL == 0:
        # Run YOLO detection with optimized settings
        results = model(color_frame, imgsz=YOLO_INPUT_SIZE, verbose=False)[0]
        detections = sv.Detections.from_ultralytics(results)
        
        # Store results for tracking-only frames
        global last_detections, last_results
        last_detections = detections
        last_results = results
    else:
        # Use previous detections for tracking only
        detections = last_detections
        results = last_results
    
    # Update tracker
    detections = tracker.update_with_detections(detections)
    
    # Fast depth calculation - vectorized when possible
    if len(detections.xyxy) > 0:
        depth_values = []
        for bbox in detections.xyxy:
            # Use fastest depth calculation method
            if DEPTH_METHOD == "center_region":
                avg_depth = calculate_depth_center_region_fast(depth_frame, bbox, CENTER_REGION_RATIO)
            else:
                avg_depth = calculate_avg_depth_in_bbox(depth_frame, bbox, method=DEPTH_METHOD)
            depth_values.append(avg_depth)
    else:
        depth_values = []
    
    # Create labels efficiently
    labels = []
    if hasattr(detections, 'class_id') and hasattr(detections, 'tracker_id') and results is not None:
        for i, (class_id, tracker_id) in enumerate(zip(detections.class_id, detections.tracker_id)):
            class_name = results.names[class_id] if class_id is not None and hasattr(results, 'names') else "unknown"
            
            if i < len(depth_values) and depth_values[i] > 0:
                depth_m = depth_values[i] / 1000.0
                label = f"#{tracker_id} {class_name} ({depth_m:.2f}m)"
            else:
                label = f"#{tracker_id} {class_name} (No depth)"
            
            labels.append(label)
    
    # Annotate frame (avoid unnecessary copy)
    annotated_frame = box_annotator.annotate(color_frame, detections=detections)
    annotated_frame = label_annotator.annotate(annotated_frame, detections=detections, labels=labels)
    
    return annotated_frame, False


# Backward compatibility
def process_frame_with_depth(color_frame, depth_frame):
    """Original function for backward compatibility"""
    annotated_frame, _ = process_frame_with_depth_optimized(color_frame, depth_frame, 0, True, None)
    return annotated_frame


# Initialize global variables for frame tracking
last_detections = None
last_results = None


In [None]:
# Initialize video writer after BAG file verification
out = cv2.VideoWriter(OUTPUT_VIDEO_PATH, fourcc, FPS, (FRAME_WIDTH, FRAME_HEIGHT))
print(f"Video writer initialized: {OUTPUT_VIDEO_PATH}")

# Process frames and create video
frame_count = 0
empty_frame_count = 0
skipped_frames = 0
processed_frames = 0
previous_frame = None

print("Starting optimized frame processing...")
print(f"Performance settings:")
print(f"- Detection interval: every {DETECTION_INTERVAL} frames")
print(f"- YOLO input size: {YOLO_INPUT_SIZE}")
print(f"- Skip similar frames: {SKIP_SIMILAR_FRAMES}")
print(f"- Depth method: {DEPTH_METHOD} (fastest)")

while not bag_reader.is_eof():
    try:
        # Read next frame from BAG file
        im_rgbd = bag_reader.next_frame()
        current_frame_index = frame_count
        
    except RuntimeError as e:
        print(f"RuntimeError during next_frame (likely EOF): {e}")
        break
    
    # Check frame limit
    if MAX_FRAMES is not None and frame_count >= MAX_FRAMES:
        print(f"Reached maximum frame limit ({MAX_FRAMES}). Stopping.")
        break
    
    # Get color and depth data
    color_o3d_image = im_rgbd.color
    depth_o3d_image = im_rgbd.depth
    
    # Check for empty frames
    is_empty = False
    if hasattr(color_o3d_image, 'is_empty'):
        is_empty = color_o3d_image.is_empty()
    else:
        if np.asarray(color_o3d_image).size == 0:
            is_empty = True
    
    if is_empty:
        print(f"Frame {current_frame_index + 1}: Color data is empty. Skipping.")
        empty_frame_count += 1
        
        if empty_frame_count >= CONSECUTIVE_EMPTY_LIMIT:
            print(f"{CONSECUTIVE_EMPTY_LIMIT} consecutive empty frames detected. Stopping.")
            break
        else:
            continue
    else:
        empty_frame_count = 0
    
    # Convert to numpy arrays
    color_np = np.asarray(color_o3d_image)
    depth_np = np.asarray(depth_o3d_image)
    
    # Handle different color formats
    if color_np.ndim == 2:
        # Convert grayscale to RGB
        color_rgb = cv2.cvtColor(color_np, cv2.COLOR_GRAY2RGB)
    elif color_np.ndim == 3:
        # Assume it's already RGB
        color_rgb = color_np
    else:
        print(f"Frame {frame_count}: Unexpected image dimensions: {color_np.shape}. Skipping.")
        continue
    
    # Process frame with optimized detection and depth calculation
    try:
        # Use optimized processing function
        run_detection = (frame_count % DETECTION_INTERVAL == 0)
        annotated_frame, should_skip = process_frame_with_depth_optimized(
            color_rgb, depth_np, frame_count, run_detection, previous_frame
        )
        
        # Skip frame if it's too similar to previous
        if should_skip:
            skipped_frames += 1
            if frame_count % 30 == 0:  # Print status every 30 frames
                print(f"Frame {frame_count}: Skipped (similar to previous) - {skipped_frames} total skipped")
            continue
        
        # Update previous frame for similarity comparison
        if SKIP_SIMILAR_FRAMES:
            previous_frame = color_rgb.copy() if color_rgb is not None else None
        
        # Convert RGB to BGR for OpenCV video writer
        frame_bgr = cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR)
        
        # Write frame to video
        out.write(frame_bgr)
        
        processed_frames += 1
        frame_count += 1
        
        # Optimized progress reporting
        if frame_count % 10 == 0:  # Print every 10 frames instead of every frame
            detection_status = "DETECTION" if run_detection else "TRACKING"
            print(f"Frame {frame_count}: {detection_status} - Processed: {processed_frames}, Skipped: {skipped_frames}")
        
        # Optional: Display first few frames for verification
        if processed_frames <= 3:
            plt.figure(figsize=(12, 8))
            plt.imshow(annotated_frame)
            plt.title(f"Frame {frame_count} - Optimized Object Detection with Depth")
            plt.axis('off')
            plt.show()
        
    except Exception as e:
        print(f"Error processing frame {frame_count}: {e}")
        continue

# Release video writer
out.release()
print("Video writer released")

# Note: BAG reader will be automatically closed when out of scope
# No explicit close() method call needed for Open3D RSBagReader

print(f"\nOptimized processing completed!")
print(f"Total frames read: {frame_count}")
print(f"Frames actually processed: {processed_frames}")
print(f"Frames skipped (similar): {skipped_frames}")
print(f"Processing efficiency: {processed_frames/max(frame_count, 1)*100:.1f}%")
print(f"Video saved as: {OUTPUT_VIDEO_PATH}")

# Calculate performance statistics
detection_frames = processed_frames // DETECTION_INTERVAL + (1 if processed_frames % DETECTION_INTERVAL > 0 else 0)
tracking_frames = processed_frames - detection_frames
print(f"\nPerformance breakdown:")
print(f"- Detection frames: {detection_frames}")
print(f"- Tracking-only frames: {tracking_frames}")
print(f"- Speed improvement: ~{DETECTION_INTERVAL}x faster inference")


In [None]:
# Optional: Display video information
if os.path.exists(OUTPUT_VIDEO_PATH):
    file_size = os.path.getsize(OUTPUT_VIDEO_PATH) / (1024 * 1024)  # MB
    print(f"\nOutput video details:")
    print(f"Path: {OUTPUT_VIDEO_PATH}")
    print(f"Size: {file_size:.2f} MB")
    print(f"Estimated duration: {frame_count / FPS:.1f} seconds")
else:
    print("Error: Output video file was not created.")


In [None]:
# Optional: Advanced depth analysis and visualization functions
# These functions can help you analyze and visualize depth calculation results

def visualize_depth_calculation(color_frame, depth_frame, bbox, method="robust_center"):
    """
    Visualize how different depth calculation methods work on a specific bounding box
    Useful for debugging and understanding method differences
    """
    import matplotlib.pyplot as plt
    
    x1, y1, x2, y2 = map(int, bbox)
    x1, y1 = max(0, x1), max(0, y1)
    x2 = min(depth_frame.shape[1], x2)
    y2 = min(depth_frame.shape[0], y2)
    
    # Get the different depth calculations
    methods = ["center_region", "robust_center", "percentile"]
    depths = {}
    for m in methods:
        try:
            depths[m] = calculate_avg_depth_in_bbox(depth_frame, bbox, method=m)
        except:
            depths[m] = 0.0
    
    # Create visualization
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
    # Original color image with bounding box
    axes[0, 0].imshow(color_frame)
    # Draw bounding box manually to avoid matplotlib.patches dependency
    axes[0, 0].plot([x1, x2, x2, x1, x1], [y1, y1, y2, y2, y1], 'r-', linewidth=2)
    axes[0, 0].set_title('Color Image with Bounding Box')
    axes[0, 0].axis('off')
    
    # Full depth image
    axes[0, 1].imshow(depth_frame, cmap='jet')
    # Draw bounding box manually to avoid matplotlib.patches dependency
    axes[0, 1].plot([x1, x2, x2, x1, x1], [y1, y1, y2, y2, y1], 'r-', linewidth=2)
    axes[0, 1].set_title('Depth Image')
    axes[0, 1].axis('off')
    
    # Cropped depth region
    bbox_depth = depth_frame[y1:y2, x1:x2]
    axes[0, 2].imshow(bbox_depth, cmap='jet')
    axes[0, 2].set_title('Bounding Box Depth')
    axes[0, 2].axis('off')
    
    # Center region visualization
    if method == "center_region" or method == "robust_center":
        width = x2 - x1
        height = y2 - y1
        margin_x = int(width * (1 - CENTER_REGION_RATIO) / 2)
        margin_y = int(height * (1 - CENTER_REGION_RATIO) / 2)
        
        center_region = bbox_depth.copy()
        center_region[:margin_y, :] = 0
        center_region[-margin_y:, :] = 0
        center_region[:, :margin_x] = 0
        center_region[:, -margin_x:] = 0
        
        axes[1, 0].imshow(center_region, cmap='jet')
        axes[1, 0].set_title(f'Center Region ({CENTER_REGION_RATIO*100:.0f}%)')
        axes[1, 0].axis('off')
    
    # Depth histogram
    valid_depth = bbox_depth[(bbox_depth > 0) & (bbox_depth < 10000)]
    if len(valid_depth) > 0:
        axes[1, 1].hist(valid_depth, bins=50, alpha=0.7)
        axes[1, 1].axvline(depths['robust_center'], color='red', linestyle='--', 
                          label=f'Robust: {depths["robust_center"]:.0f}mm')
        axes[1, 1].axvline(depths['percentile'], color='green', linestyle='--', 
                          label=f'Median: {depths["percentile"]:.0f}mm')
        axes[1, 1].set_title('Depth Distribution')
        axes[1, 1].set_xlabel('Depth (mm)')
        axes[1, 1].set_ylabel('Frequency')
        axes[1, 1].legend()
    
    # Results comparison
    axes[1, 2].axis('off')
    results_text = "Depth Results (mm):\\n\\n"
    for method_name, depth_val in depths.items():
        results_text += f"{method_name}: {depth_val:.1f}\\n"
    axes[1, 2].text(0.1, 0.5, results_text, fontsize=12, 
                   transform=axes[1, 2].transAxes, verticalalignment='center')
    axes[1, 2].set_title('Method Comparison')
    
    plt.tight_layout()
    plt.show()
    
    return depths


def analyze_depth_consistency(depth_frame, detections, method="robust_center"):
    """
    Analyze depth consistency across multiple detections
    Useful for understanding method performance across different objects
    """
    depth_stats = []
    
    for i, bbox in enumerate(detections.xyxy):
        # Calculate depth using multiple methods for comparison
        methods = ["center_region", "robust_center", "percentile"]
        depths = {}
        
        for m in methods:
            try:
                depths[m] = calculate_avg_depth_in_bbox(depth_frame, bbox, method=m)
            except:
                depths[m] = 0.0
        
        depth_stats.append({
            'detection_id': i,
            'bbox': bbox,
            **depths
        })
    
    return depth_stats


# Example usage (uncomment to test with your data):
# depth_stats = analyze_depth_consistency(depth_np, detections)
# print("Depth analysis for current frame:")
# for stat in depth_stats:
#     print(f"Detection {stat['detection_id']}: Robust={stat['robust_center']:.1f}mm, "
#           f"Median={stat['percentile']:.1f}mm, Center={stat['center_region']:.1f}mm")


In [None]:
# Additional performance monitoring and optimization tools

import time

def benchmark_processing_speed(color_frame, depth_frame, iterations=10):
    """
    Benchmark different processing methods to find optimal settings
    """
    print("🔥 Performance Benchmark Results:")
    print("=" * 50)
    
    # Benchmark original method
    start_time = time.time()
    for _ in range(iterations):
        _ = process_frame_with_depth(color_frame, depth_frame)
    original_time = (time.time() - start_time) / iterations
    print(f"Original method: {original_time:.3f}s per frame")
    
    # Benchmark optimized method
    start_time = time.time()
    for i in range(iterations):
        run_detection = (i % DETECTION_INTERVAL == 0)
        _, _ = process_frame_with_depth_optimized(color_frame, depth_frame, i, run_detection, None)
    optimized_time = (time.time() - start_time) / iterations
    print(f"Optimized method: {optimized_time:.3f}s per frame")
    
    speedup = original_time / optimized_time if optimized_time > 0 else 0
    print(f"🚀 Speed improvement: {speedup:.1f}x faster!")
    print(f"💰 Time saved per frame: {(original_time - optimized_time)*1000:.1f}ms")
    
    return original_time, optimized_time, speedup


def estimate_processing_time(total_frames, fps=6):
    """
    Estimate total processing time with current settings
    """
    # Estimate processing time per frame
    detection_ratio = 1.0 / DETECTION_INTERVAL
    base_time_per_frame = 0.1  # Base estimate in seconds
    
    # Detection is ~5x slower than tracking
    avg_time_per_frame = base_time_per_frame * (detection_ratio * 5 + (1 - detection_ratio) * 1)
    
    # Account for frame skipping
    if SKIP_SIMILAR_FRAMES:
        avg_time_per_frame *= 0.7  # Assume 30% frames skipped
    
    total_time_seconds = total_frames * avg_time_per_frame
    
    print(f"📊 Processing Time Estimate:")
    print(f"- Total frames: {total_frames}")
    print(f"- Estimated time per frame: {avg_time_per_frame:.3f}s")
    print(f"- Total estimated processing time: {total_time_seconds/60:.1f} minutes")
    print(f"- Output video duration: {total_frames/fps:.1f} seconds")
    print(f"- Processing ratio: {total_time_seconds/(total_frames/fps):.1f}x real-time")


def get_gpu_info():
    """
    Check GPU availability and memory for optimization
    """
    try:
        import torch
        if torch.cuda.is_available():
            gpu_name = torch.cuda.get_device_name(0)
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
            print(f"🎮 GPU Available: {gpu_name}")
            print(f"💾 GPU Memory: {gpu_memory:.1f}GB")
            print("✅ Recommended: Use YOLO_INPUT_SIZE=640-1280")
        else:
            print("🖥️  Using CPU (no GPU detected)")
            print("⚠️  Recommended: Use YOLO_INPUT_SIZE=320-640 for faster processing")
    except ImportError:
        print("🔍 PyTorch not available for GPU detection")


# Performance monitoring functions
def monitor_memory_usage():
    """Monitor current memory usage"""
    import psutil
    process = psutil.Process()
    memory_mb = process.memory_info().rss / 1024 / 1024
    print(f"💾 Current memory usage: {memory_mb:.1f}MB")


# Example usage (uncomment to run benchmarks):
# print("System Information:")
# get_gpu_info()
# monitor_memory_usage()
# 
# # Estimate processing time for your video
# estimate_processing_time(MAX_FRAMES or 1000)
#
# # Benchmark performance (requires a sample frame)
# # benchmark_processing_speed(sample_color_frame, sample_depth_frame)
