# Import
Use dipe kernel

In [1]:
import os, time, cv2, torch

import numpy as np
from torch.utils.data import DataLoader
from PIL import Image 

import datasets, networks

from utils.utils import readlines
from utils.kitti_utils import export_gt_depths
from utils.evaluation_utils import *
from layers import disp_to_depth

# Set Parameters

Linux commands examples:

python evaluate_kitti.py --data_path /home/leying/Documents/data/kitti_dataset --load_weights_folder /home/leying/Documents/DiPE/models/pt_models/dipe_eigen --eval_mono --eval_split eigen --png

python evaluate_kitti.py --data_path /home/leying/Documents/data --load_weights_folder /home/leying/Documents/DiPE/models/pt_models/dipe_bench --dataset kitti_depth --eval_mono --eval_split benchmark --png

In [2]:
# Use DiPE checkpoints
# load_weights_folder = os.path.join(os.getcwd(), "models", "pt_models", "dipe_bench")
load_weights_folder = os.path.join(os.getcwd(), "models", "pt_models", "dipe_eigen")
# Use monodepth2 checkpoints
# load_weights_folder = "/home/leying/Documents/monodepth2/models/mono_640x192"
# load_weights_folder = "/home/leying/Documents/monodepth2/models/mono+stereo_640x192"
# load_weights_folder = "/home/leying/Documents/monodepth2/models/stereo_640x192"
assert os.path.isdir(load_weights_folder)

# eval_split = "benchmark"
eval_split = "eigen"
# data_path = "/home/leying/Documents/data"
data_path = "/home/leying/Documents/data/kitti_dataset"

num_workers = 8 # Number of dataloader workers
num_layers = 18 # Number of resnet layers, choices=[18, 34, 50, 101, 152]
num_scales = 4

debug = False
splits_dir = os.path.join(os.getcwd(), "splits")
if debug:
    # For single image processing
#     filenames = ["kitti_depth/val_selection_cropped 2011_09_26_drive_0002_sync 0000000005_image_02"] # benchmark
    filenames = ["2011_09_26/2011_09_26_drive_0002_sync 0000000069 l"] # eigen
else:
    # For large data processing
#     filenames = readlines(os.path.join(splits_dir, eval_split, "val_selection_files.txt")) # benchmark
    filenames = readlines(os.path.join(splits_dir, eval_split, "test_files.txt")) # eigen


min_depth = 0.1
max_depth = 100.0
pred_depth_scale_factor = 1 # If set multiplies predictions by this number

# Use DiPE checkpoints
# save_dir = os.path.join(os.getcwd(), "results", "benchmark")
save_dir = os.path.join(os.getcwd(), "results", "eigen")
# Use monodepth2 checkpoints
# save_dir = "/home/leying/Documents/monodepth2/assets/kitti_depth_val_selection_mono_640x192"
# save_dir = "/home/leying/Documents/monodepth2/assets/kitti_depth_val_selection_mono+stereo_640x192"
# save_dir = "/home/leying/Documents/monodepth2/assets/kitti_depth_val_selection_stereo_640x192"
# save_dir = "/home/leying/Documents/monodepth2/assets/drive_test_mono_640x192"
# save_dir = "/home/leying/Documents/monodepth2/assets/drive_test_mono+stereo_640x192"
# save_dir = "/home/leying/Documents/monodepth2/assets/drive_test_stereo_640x192"

disable_median_scaling = False

# Other Variables

In [3]:
MIN_DEPTH = 1e-3
MAX_DEPTH = 80

cv2.setNumThreads(0)  # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)

datasets_dict = {"eigen": datasets.KITTIRAWDataset,
                 "eigen_benchmark": datasets.KITTIRAWDataset,
                 "benchmark": datasets.KITTIDepthTestDataset,
                 "odom_09": datasets.KITTIOdomDataset,
                 "odom_10": datasets.KITTIOdomDataset}

device = torch.device("cuda")

# Evaluation

In [4]:
# Load checkpoints
assert os.path.isdir(load_weights_folder), "Cannot find a folder at {}".format(load_weights_folder)
print("-> Loading weights from {}".format(load_weights_folder))

depth_encoder_path = os.path.join(load_weights_folder, "encoder.pth")
depth_decoder_path = os.path.join(load_weights_folder, "depth.pth")

encoder_dict = torch.load(depth_encoder_path)

depth_encoder = networks.ResnetEncoder(num_layers, False)
depth_decoder = networks.DepthDecoder(depth_encoder.num_ch_enc)

model_dict = depth_encoder.state_dict()
depth_encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
depth_decoder.load_state_dict(torch.load(depth_decoder_path))

depth_encoder.to(device);
depth_encoder.eval();
depth_decoder.to(device);
depth_decoder.eval();

-> Loading weights from /home/leying/Documents/DiPE/models/pt_models/dipe_eigen


In [5]:
# Load image
dataset = datasets_dict[eval_split](data_path, filenames,
                                        encoder_dict['height'], encoder_dict['width'],
                                        [0], 4, is_train=False, img_ext='.png')

dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=num_workers,
                        pin_memory=True, drop_last=False)

In [6]:
# Inference
print("-> Computing predictions with size {}x{}".format(encoder_dict['width'], encoder_dict['height']))

pred_disps = []
times = []
with torch.no_grad():
    for data in dataloader:
        input_color = data[("color", 0, 0)].to(device)
        
        start_time = time.time()
        output = depth_decoder(depth_encoder(input_color))
        pred_disp, _ = disp_to_depth(output[("disp", 0, 0)], min_depth, max_depth)
        end_time = time.time() - start_time
        if debug:
            print("--- %s seconds for one image ---" % (end_time))
        
        pred_disp = pred_disp.cpu()[:, 0].numpy()
        
        pred_disps.append(pred_disp)
        times.append(end_time)
        
pred_disps = np.concatenate(pred_disps)
mean_time = np.array(times).mean(0)
print("### AVG: %s seconds/image for %s image(s) ###" % (mean_time, len(pred_disps)))

-> Computing predictions with size 640x192
### AVG: 0.015598042444749311 seconds/image for 697 image(s) ###


In [7]:
# QA
gt_path = os.path.join(splits_dir, eval_split, "gt_depths.npz")
if not os.path.exists(gt_path):
    print("-> GT not found, generating...")
    gt_depths = export_gt_depths(data_path, eval_split)
else:
    print("-> GT found, loading...")
    gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"]
    
print("-> Evaluating")

errors = []
ratios = []

for i in range(pred_disps.shape[0]):

    gt_depth = gt_depths[i]
    gt_height, gt_width = gt_depth.shape[:2]

    pred_disp = pred_disps[i]
    pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
    pred_depth = 1 / pred_disp

    if eval_split == "eigen" or eval_split == "eigen_benchmark":
        mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

        crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
                         0.03594771 * gt_width,  0.96405229 * gt_width]).astype(np.int32)
        crop_mask = np.zeros(mask.shape)
        crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
        mask = np.logical_and(mask, crop_mask)

    else:
        mask = gt_depth > 0

    pred_depth = pred_depth[mask]
    gt_depth = gt_depth[mask]

    pred_depth *= pred_depth_scale_factor
    if not disable_median_scaling:
        ratio = np.median(gt_depth) / np.median(pred_depth)
        ratios.append(ratio)
        pred_depth *= ratio

    pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
    pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

    errors.append(compute_errors(gt_depth, pred_depth))

scale_factor = pred_depth_scale_factor
if not disable_median_scaling:
    ratios = np.array(ratios)
    med = np.median(ratios)
    print(" Scaling ratios | med: {} | std: {}".format(med, np.std(ratios / med)))
    scale_factor *= med

mean_errors = np.array(errors).mean(0)

print("\n  " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
print(("|{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "|")
print("\n-> Done!")

-> GT not found, generating...
Exporting ground truth depths for eigen


  gt_depths = np.array(gt_depths)


Saving to eigen
-> Evaluating
 Scaling ratios | med: 29.247262954711914 | std: 0.086000956594944

   abs_rel |   sq_rel |     rmse | rmse_log |       a1 |       a2 |       a3 | 
|   0.112  |   0.877  |   4.797  |   0.190  |   0.880  |   0.960  |   0.981  |

-> Done!


In [8]:
# Save image
# Set scale_factor if QA is skipped
# if not disable_median_scaling:
#     # Scaling ratios for kitti benchmark | med: 24.854228973388672 | std: 0.06617851555347443
#     # Scaling ratios for eigen | med: 29.51048469543457 | std: 0.07228389382362366
#     med = 24.854228973388672
#     scale_factor = pred_depth_scale_factor * med
# else:
#     scale_factor = pred_depth_scale_factor
    
for idx in range(len(pred_disps)):
    disp_resized = cv2.resize(pred_disps[idx], dsize=(1216, 352))
    depth = scale_factor / disp_resized
    depth = np.clip(depth, 0, 80)
    depth = np.uint16(depth * 256)
    save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
    cv2.imwrite(save_path, depth)