In [1]:
from google.colab import drive
drive.mount("/content/gdrive/", force_remount=True)
import os

os.chdir("/content/gdrive/MyDrive/bundletrack_process_depth")
!nvidia-smi

Mounted at /content/gdrive/
Thu Jul  6 21:15:06 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------

In [2]:
# !pip install moviepy
# !pip install timm

In [55]:
# convert video to frames
video_path = "4016_toss1.mov"
frames_folder = "frames/"

from moviepy.editor import VideoFileClip
import cv2
from tqdm import tqdm

# Create the destination folder if it doesn't exist
if not os.path.exists(frames_folder):
    os.makedirs(frames_folder)

# Load the video clip
clip = VideoFileClip(video_path)

# Iterate through each frame in the video
for i, frame in tqdm(enumerate(clip.iter_frames())):
    # Construct the frame filename
    frame_filename = os.path.join(frames_folder, f"frame_{i}.png")

    # Convert the frame to RGB format (if necessary)
    if frame.ndim == 3:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Save the frame as a JPEG image
    cv2.imwrite(frame_filename, frame)



1619it [04:56,  5.47it/s]


In [56]:
import torch
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import numpy as np
import torch.nn.functional as F

In [43]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# load pretrained depth estimator network
model_type = 'DPT_Large'
depth_estimator = torch.hub.load("intel-isl/MiDaS", model_type)
depth_estimator.to(device)
depth_estimator.eval();

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


In [None]:
fig = plt.figure(figsize=(6, 4))

image_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.CenterCrop(size=(1080, 1440)),
     transforms.Resize(size=(480, 640))]
)

# load all images in dataset
source_folder = "frames/"
destination_folder = "depths/"

image_paths = []
for filename in os.listdir(source_folder):
    image_paths.append(filename)

with torch.no_grad():
    for image_path in tqdm(image_paths):
        load_path = os.path.join(source_folder, image_path)
        image = Image.open(load_path)

        # convert PIL Image to PyTorch tensor
        image = image_transform(image).to(device)
        image = torch.unsqueeze(image, dim=0)

        # estimate the depth map from each image
        image_depth = depth_estimator(image)  # size (1, 3, 256, 256)
        image_depth = 1000 * image_depth / (torch.max(image_depth) - torch.min(image_depth))

        # save depth maps
        image_depth = image_depth[0].cpu().numpy().astype(np.uint16)
        image_depth = Image.fromarray(image_depth)

        id = image_path.split("_")[1].split(".")[0]
        padded_id = id.zfill(4)
        save_name = str(padded_id)+'.png'
        save_path = os.path.join(destination_folder, save_name)
        image_depth.save(save_path)

        # # show depth maps
        # plt.imshow(image[0].permute(1, 2, 0).cpu())
        # plt.show()
        # plt.imshow(image_depth)
        # plt.show()
        # break



 52%|█████▏    | 836/1619 [06:08<05:53,  2.22it/s]