In [1]:
import argparse
import numpy as np
import os
import torch
import tifffile as tiff

from video_depth_anything.video_depth import VideoDepthAnything
from utils.dc_utils import read_video_frames, save_video
from torchinfo import summary
from torchvision.transforms import Compose
from video_depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
import cv2
from thop import profile
import torch.profiler as profiler
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from einops import rearrange
import imageio
from natsort import natsorted
import subprocess
import warnings
import re
from utils.dc_utils import read_video_frames

from video_depth_anything.video_depth import INFER_LEN

In [None]:
# Load videos gt and get rgb paths: 
root_dir = '/export/data/ffeiden/data/vkitti_videos/'
cam = 'Camera_0'
device = 'cuda:3'
encoder = 'vits' # Alternative 'vitl'
generate = True
scene = ['Scene01']
FPS = 25


vis_methods = ['VDA_s_vits', 'VDA_vits', 'DepthAny2_raw', 'DepthAny2']

all_data = os.listdir(root_dir)

# Sort Data
methods = ['DepthAny', 'DepthAny2', 'PrimeDepth']
path_dic = {}

for key in methods:
    if key == 'gt':
        path_dic[key] = natsorted([p for p in all_data if cam+'_'+key+'.tiff' in p])
    else:
        path_dic[key] = natsorted([p for p in all_data if key+'_' in p and cam+'.tiff' in p])
        path_dic[key+'_raw'] = natsorted([p for p in all_data if key+'_' in p and cam+'_raw.tiff' in p])

path_dic['rgb'] = [p.replace('_gt.tiff', '.mp4') for p in path_dic['gt']]



In [None]:

if generate:
    # Generate DepthAnythingVideo predictions with supprocess and save them in data 
    for video_path in path_dic['rgb']:
        subprocess.run(["python", "run.py", "--device", device, 
                        "--input_video", os.path.join(root_dir, video_path),
                        "--output_dir", root_dir,
                        "--save_tiff", 
                        "--save_vis",
                        "--encoder", encoder,
                        "--process_single_image"])
        
        subprocess.run(["python", "run.py", "--device", device, 
                        "--input_video", os.path.join(root_dir, video_path),
                        "--output_dir", root_dir,
                        "--save_tiff", 
                        "--save_vis",
                        "--encoder", encoder,])

In [3]:
# Update Paths:
all_data = os.listdir(root_dir)

path_dic[f'VDA_s_{encoder}'] = natsorted([p for p in all_data if 'Single_VideoDepthAny_' in p and '_depths.tiff' in p])
if len(path_dic[f'VDA_s_{encoder}']) == 0:
    warnings.warn('No VideoDepthAny with single image processing found, removed from list', UserWarning)
    del path_dic[f'VDA_s_{encoder}']

path_dic[f'VDA_{encoder}'] = natsorted([p for p in all_data if 'VideoDepthAny_' in p and '_depths.tiff' in p and 'Single_' not in p])
if len(path_dic[f'VDA_{encoder}']) == 0:
    warnings.warn('No VideoDepthAny with single image processing found, removed from list', UserWarning)
    del path_dic[f'VDA_{encoder}']

In [10]:
# Load data
data_dic = {}
for key in tqdm(path_dic):
    if key != 'rgb':
        for p in path_dic[key]:
            scene_pattern = re.compile(r"(Scene\d+)")
            scene = scene_pattern.search(p).group(1)
            data_dic[key+f'_{scene}'] = tiff.imread(os.path.join(root_dir, p))
    else:
        for p in path_dic[key]:
            scene_pattern = re.compile(r"(Scene\d+)")
            scene = scene_pattern.search(p).group(1)
            data_dic[key+f'_{scene}'], _ = read_video_frames(os.path.join(root_dir, p), process_length=-1, target_fps=-1, max_res=-1)

100%|██████████| 10/10 [00:13<00:00,  1.30s/it]


In [None]:
# Visualise Data
def visualise_data(data_dic, methods, scene_idx, root='.'):
    # output_name 
    methods = [mth + '_' for mth in methods]
    vis_name = f'{scene[scene_idx]}_{methods}_Vis.mp4'

    # data of RGB and GT
    rgb_vid = data_dic[f'rgb_{scene[scene_idx]}']
    gt_depth_vid = data_dic[f'gt_{scene[scene_idx]}']

    # Video info 
    frame_count = len(rgb_vid)
    height = rgb_vid.shape[1]
    width = rgb_vid.shape[2]

    # Set up figure
    fig, axs = plt.subplots(nrows=3, ncols=len(methods)+1)
    
    # Prepare plot 
    Loss_axs = axs[0, 2]
    Loss_axs.set_xlim(0, frame_count)
    Loss_axs.set_ylim( Loss_min, Loss_max)
    # TODO: Setup for every Method
    loss_line, = Loss_axs.plot([], [], label='')

    # Setup video writer variables
    writer = None
    for t in tqdm(range(len(rgb_vid))):
        # Implement all images 
        if t==0:
            writer = imageio.get_writer(os.path.join(root, vis_name), fps=FPS, macro_block_size=1, codec='libx264', ffmpeg_params=['-crf', '18'])


[array([[[80.00008  , 80.00008  , 80.00008  , ...,  6.53     ,
           6.519999 ,  6.5100007],
         [80.00008  , 80.00008  , 80.00008  , ...,  6.53     ,
           6.519999 ,  6.5100007],
         [80.00008  , 80.00008  , 80.00008  , ...,  6.54     ,
           6.519999 ,  6.5100007],
         ...,
         [ 8.49     ,  8.48     ,  8.48     , ...,  3.99     ,
           3.99     ,  3.99     ],
         [ 8.429999 ,  8.42     ,  8.42     , ...,  3.98     ,
           3.98     ,  3.98     ],
         [ 8.369999 ,  8.36     ,  8.36     , ...,  3.98     ,
           3.98     ,  3.98     ]],
 
        [[80.00008  , 80.00008  , 80.00008  , ...,  6.55     ,
           6.54     ,  6.53     ],
         [80.00008  , 80.00008  , 80.00008  , ...,  6.55     ,
           6.54     ,  6.53     ],
         [80.00008  , 80.00008  , 80.00008  , ...,  6.55     ,
           6.54     ,  6.53     ],
         ...,
         [ 8.6      ,  8.59     ,  8.59     , ...,  2.91     ,
           2.91     ,  2