# Perform Depth Estimation

In [1]:
from google.colab import drive
# Step 1: Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import cv2
import torch

# Function to generate depth maps
def process_depth_estimation(input_folder, output_folder, model, transform, device):
    """
    Generate depth maps for all frames in the input folder and save them in the output folder.

    Args:
        input_folder (str): Path to input frames.
        output_folder (str): Path to save depth maps.
        model: MiDaS model.
        transform: MiDaS preprocessing transformations.
        device: Torch device (cuda or cpu).
    """
    for root, dirs, files in os.walk(input_folder):
        for file in files:
            if file.endswith(".jpg") or file.endswith(".png"):
                frame_path = os.path.join(root, file)

                # Create subfolder structure in output directory
                relative_path = os.path.relpath(root, input_folder)
                output_subfolder = os.path.join(output_folder, relative_path)
                os.makedirs(output_subfolder, exist_ok=True)

                try:
                    # Read the image
                    frame = cv2.imread(frame_path)

                    # Validate image
                    if frame is None or frame.size == 0:
                        print(f"Skipping invalid or empty image: {frame_path}")
                        continue

                    print(f"Processing: {frame_path}")

                    # Convert frame to RGB
                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                    # Apply MiDaS transform
                    # DO NOT add `.unsqueeze(0)` as transform handles batching
                    input_tensor = transform(frame_rgb).to(device)
                    print(f"Input Tensor Shape: {input_tensor.shape}")  # Debug tensor shape

                    # Predict depth map
                    with torch.no_grad():
                        prediction = model(input_tensor)
                        depth_map = prediction.squeeze().cpu().numpy()

                    # Normalize and save depth map
                    depth_map_normalized = cv2.normalize(
                        depth_map, None, 0, 255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U
                    )
                    depth_output_path = os.path.join(output_subfolder, f"depth_{file}")
                    cv2.imwrite(depth_output_path, depth_map_normalized)

                except Exception as e:
                    print(f"Error processing {frame_path}: {e}")

# Load MiDaS model
def load_midas_model():
    """
    Load MiDaS model and its transformations.
    """
    model_type = "DPT_Large"  # Options: "DPT_Large", "DPT_Hybrid", "MiDaS_small"
    model = torch.hub.load("intel-isl/MiDaS", model_type, trust_repo=True)
    model.eval()  # Set to evaluation mode

    # Load appropriate transformations
    transforms = torch.hub.load("intel-isl/MiDaS", "transforms", trust_repo=True)
    if model_type in ["DPT_Large", "DPT_Hybrid"]:
        transform = transforms.dpt_transform
    else:
        transform = transforms.small_transform

    return model, transform

# Initialize MiDaS model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
midas_model, midas_transform = load_midas_model()
midas_model.to(device)

# Generate depth maps for train, val, and test datasets
process_depth_estimation(
    input_folder="/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train",
    output_folder="/content/drive/MyDrive/TharuWorks/Depth_Splits/Negative_Extract/train",
    model=midas_model,
    transform=midas_transform,
    device=device,
)

process_depth_estimation(
    input_folder="/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/val",
    output_folder="/content/drive/MyDrive/TharuWorks/Depth_Splits/Negative_Extract/val",
    model=midas_model,
    transform=midas_transform,
    device=device,
)

process_depth_estimation(
    input_folder="/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/test",
    output_folder="/content/drive/MyDrive/TharuWorks/Depth_Splits/Negative_Extract/test",
    model=midas_model,
    transform=midas_transform,
    device=device,
)

Downloading: "https://github.com/intel-isl/MiDaS/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt" to /root/.cache/torch/hub/checkpoints/dpt_large_384.pt
100%|██████████| 1.28G/1.28G [00:04<00:00, 287MB/s]
Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


Processing: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w2_19/frame_0.jpg
Input Tensor Shape: torch.Size([1, 3, 384, 672])
Processing: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w2_19/frame_90.jpg
Input Tensor Shape: torch.Size([1, 3, 384, 672])
Processing: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w2_19/frame_180.jpg
Input Tensor Shape: torch.Size([1, 3, 384, 672])
Processing: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w1-33/frame_0.jpg
Input Tensor Shape: torch.Size([1, 3, 384, 672])
Processing: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w1-33/frame_90.jpg
Input Tensor Shape: torch.Size([1, 3, 384, 672])
Processing: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w1-33/frame_180.jpg
Input Tensor Shape: torch.Size([1, 3, 384, 672])
Processing: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w1-33/frame_270.jpg
Input Tensor Shape: torch.Size([1, 3, 38

# Prepare Data for Roboflow Annotation

In [3]:
import json
import shutil
import os

def prepare_roboflow_input_with_metadata_and_logs(input_frames_folder, depth_maps_folder, output_folder):
    """
    Combine frames, depth maps, and metadata logs into a Roboflow-compatible folder structure.

    Args:
        input_frames_folder (str): Path to the folder containing input frames.
        depth_maps_folder (str): Path to the folder containing depth maps.
        output_folder (str): Path to save the combined dataset for Roboflow.
    """
    os.makedirs(output_folder, exist_ok=True)

    for root, _, files in os.walk(input_frames_folder):
        for file in files:
            if file.endswith(".jpg") or file.endswith(".png"):
                frame_path = os.path.join(root, file)

                # Compute the relative path and match with depth map
                relative_path = os.path.relpath(root, input_frames_folder)
                depth_map_path = os.path.join(depth_maps_folder, relative_path, f"depth_{file}")
                metadata_file_path = os.path.join(root, "metadata_log.json")

                # Ensure output subfolder exists
                output_subfolder = os.path.join(output_folder, relative_path)
                os.makedirs(output_subfolder, exist_ok=True)

                if os.path.exists(depth_map_path):
                    # Copy frame and depth map to the output directory
                    frame_output_path = os.path.join(output_subfolder, file)
                    depth_output_path = os.path.join(output_subfolder, f"depth_{file}")

                    shutil.copy(frame_path, frame_output_path)
                    shutil.copy(depth_map_path, depth_output_path)
                    print(f"Copied frame and depth map: {file} to {output_subfolder}")
                else:
                    print(f"Depth map not found for: {frame_path}")

                # Copy metadata file if it exists
                if os.path.exists(metadata_file_path):
                    metadata_output_path = os.path.join(output_subfolder, "metadata_log.json")
                    shutil.copy(metadata_file_path, metadata_output_path)
                    print(f"Copied metadata log: {metadata_file_path} to {output_subfolder}")
                else:
                    print(f"Metadata log not found in: {root}")

    print(f"All files and metadata logs have been organized in: {output_folder}")

In [4]:
# Prepare Roboflow input for train, val, and test
prepare_roboflow_input_with_metadata_and_logs(
    input_frames_folder="/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train",
    depth_maps_folder="/content/drive/MyDrive/TharuWorks/Depth_Splits/Negative_Extract/train",
    output_folder="/content/drive/MyDrive/TharuWorks/Roboflow_Input/Negative_Extract/train"
)

prepare_roboflow_input_with_metadata_and_logs(
    input_frames_folder="/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/val",
    depth_maps_folder="/content/drive/MyDrive/TharuWorks/Depth_Splits/Negative_Extract/val",
    output_folder="/content/drive/MyDrive/TharuWorks/Roboflow_Input/Negative_Extract/val"
)

prepare_roboflow_input_with_metadata_and_logs(
    input_frames_folder="/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/test",
    depth_maps_folder="/content/drive/MyDrive/TharuWorks/Depth_Splits/Negative_Extract/test",
    output_folder="/content/drive/MyDrive/TharuWorks/Roboflow_Input/Negative_Extract/test"
)

Copied frame and depth map: frame_0.jpg to /content/drive/MyDrive/TharuWorks/Roboflow_Input/Negative_Extract/train/w2_19
Copied metadata log: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w2_19/metadata_log.json to /content/drive/MyDrive/TharuWorks/Roboflow_Input/Negative_Extract/train/w2_19
Copied frame and depth map: frame_90.jpg to /content/drive/MyDrive/TharuWorks/Roboflow_Input/Negative_Extract/train/w2_19
Copied metadata log: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w2_19/metadata_log.json to /content/drive/MyDrive/TharuWorks/Roboflow_Input/Negative_Extract/train/w2_19
Copied frame and depth map: frame_180.jpg to /content/drive/MyDrive/TharuWorks/Roboflow_Input/Negative_Extract/train/w2_19
Copied metadata log: /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w2_19/metadata_log.json to /content/drive/MyDrive/TharuWorks/Roboflow_Input/Negative_Extract/train/w2_19
Copied frame and depth map: frame_0.jpg to /content/drive/MyDriv