In [28]:
import torch
import os
import numpy as np
import util
import data_util
import matplotlib.pyplot as plt
from glob import glob
from scipy.io import loadmat
import cv2
from skimage.measure import compare_psnr
from skimage.measure import compare_ssim

from deep_voxels import DeepVoxels
from projection import ProjectionHelper

In [29]:
device = torch.device('cuda')
model_path = os.path.join('/home/max/cache/logs/', 'model-epoch_36_iter_24000.pth')
data_path = os.path.join('/home/max/depth_infer_dataset/')

In [30]:
img_dim = 256
grid_dim = 32
proj_image_dims = [64, 64]

_, grid_barycenter, scale, near_plane, _ = \
    util.parse_intrinsics(os.path.join(data_path, 'intrinsics.txt'), trgt_sidelength=proj_image_dims[0])

if near_plane == 0.0:
    near_plane = np.sqrt(3)/2

In [31]:
lift_intrinsic = util.parse_intrinsics(os.path.join(data_path, 'intrinsics.txt'),
                                       trgt_sidelength=proj_image_dims[0])[0]
proj_intrinsic = lift_intrinsic

# Set up scale and world coordinates of voxel grid
voxel_size = (1. / grid_dim) * 1.1 * scale
grid_origin = torch.tensor(np.eye(4)).float().to(device).squeeze()
grid_origin[:3,3] = grid_barycenter

# Minimum and maximum depth used for rejecting voxels outside of the cmaera frustrum
depth_min = 0.
depth_max = grid_dim * voxel_size + near_plane
grid_dims = 3 * [grid_dim]

# Resolution of canonical viewing volume in the depth dimension, in number of voxels.
frustrum_depth = int(np.ceil(1.5 * grid_dim))

In [32]:
model = DeepVoxels(lifting_img_dims=proj_image_dims,
                   frustrum_img_dims=proj_image_dims,
                   grid_dims=grid_dims,
                   use_occlusion_net=True,
                   num_grid_feats=64,
                   nf0=64,
                   img_sidelength=img_dim)
model.to(device)


# Projection module
projection = ProjectionHelper(projection_intrinsic=proj_intrinsic,
                              lifting_intrinsic=lift_intrinsic,
                              depth_min=depth_min,
                              depth_max=depth_max,
                              projection_image_dims=proj_image_dims,
                              lifting_image_dims=proj_image_dims,
                              grid_dims=grid_dims,
                              voxel_size=voxel_size,
                              device=device,
                              frustrum_depth=frustrum_depth,
                              near_plane=near_plane)

OcclusionNet(
  (occlusion_prep): Sequential(
    (0): Conv3dSame(
      (net): Sequential(
        (0): ReplicationPad3d((1, 1, 1, 1, 1, 1))
        (1): Conv3d(65, 4, kernel_size=(3, 3, 3), stride=(1, 1, 1), bias=False)
      )
    )
    (1): BatchNorm3d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (occlusion_net): Unet3d(
    (in_layer): Sequential(
      (0): Conv3dSame(
        (net): Sequential(
          (0): ReplicationPad3d((1, 1, 1, 1, 1, 1))
          (1): Conv3d(4, 4, kernel_size=(3, 3, 3), stride=(1, 1, 1), bias=False)
        )
      )
      (1): BatchNorm3d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2, inplace)
    )
    (unet_block): UnetSkipConnectionBlock3d(
      (model): Sequential(
        (0): DownBlock3D(
          (net): Sequential(
            (0): ReplicationPad3d((1, 1, 1, 1, 1, 1))
            (1): Conv3d(4, 8, kernel_size=(4, 4, 4), stride=(2, 2,

In [33]:
util.custom_load(model, model_path)
model.eval()

DeepVoxels(
  (feature_extractor): Sequential(
    (0): DownsamplingNet(
      (downs): Sequential(
        (0): DownBlock(
          (net): Sequential(
            (0): ReflectionPad2d((1, 1, 1, 1))
            (1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
            (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (3): LeakyReLU(negative_slope=0.2, inplace)
            (4): ReflectionPad2d((1, 1, 1, 1))
            (5): Conv2d(64, 64, kernel_size=(4, 4), stride=(2, 2), bias=False)
            (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (7): LeakyReLU(negative_slope=0.2, inplace)
          )
        )
        (1): DownBlock(
          (net): Sequential(
            (0): ReflectionPad2d((1, 1, 1, 1))
            (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
            (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stat

In [34]:
rgb_list = sorted(glob(os.path.join(data_path,'rgb','*.png')))
pose_list = sorted(glob(os.path.join(data_path,'pose','*.mat')))
depth_list = sorted(glob(os.path.join(data_path,'depth','*.png')))

In [35]:
psnr = list()
for i in range(len(rgb_list)):
    img = data_util.load_img(rgb_list[i], square_crop=True, downsampling_order=1, target_size=[img_dim, img_dim])
    img = img[:, :, :3].astype(np.float32) / 255. - 0.5
    img = img.transpose(2,0,1)
    img = torch.from_numpy(img)

    pose = loadmat(pose_list[i])['extrinsic']
    pose = np.linalg.inv(pose).astype(np.float32)
    pose = torch.from_numpy(pose)

    num_pretest = 1
    with torch.no_grad():
        model.representation = torch.zeros((1, model.n_grid_feats, model.grid_dims[0], model.grid_dims[1], model.grid_dims[2])).cuda()
        for num in range(num_pretest):
            backproj_mapping = list()
            inpt_rgbs = list()
            backproj_mapping.append(projection.comp_lifting_idcs(camera_to_world=pose.squeeze().to(device),grid2world=grid_origin))

            inpt_rgbs.append(img.unsqueeze(0).to(device)) 

            lift_volume_idcs, lift_img_coords = list(zip(*backproj_mapping))

            model(inpt_rgbs, 'pretest', None, None, lift_volume_idcs, lift_img_coords, None)

        trgt_pose = pose.squeeze().to(device)

        # compute projection mapping
        proj_mapping = projection.compute_proj_idcs(trgt_pose.squeeze(), grid_origin)

        proj_ind_3d, proj_ind_2d = proj_mapping

        # Run through model
        _, depth_maps = model(None, 'test', [proj_ind_3d], [proj_ind_2d], None, None, None)

        depth_img = -depth_maps[0].squeeze(0).cpu().detach().numpy()
        depth_img = depth_img.transpose(1, 2, 0)
        depth_img = (depth_img - np.amin(depth_img)) / (np.amax(depth_img) - np.amin(depth_img))
        depth_img = depth_img.squeeze()
        
        true_depth = -cv2.imread(depth_list[i], cv2.IMREAD_UNCHANGED)
        true_depth= cv2.resize(true_depth, tuple(proj_image_dims), interpolation=cv2.INTER_AREA)
        true_depth = (true_depth - np.amin(true_depth)) / (np.amax(true_depth) - np.amin(true_depth))
        true_depth = true_depth
        
        psnr.append(compare_psnr(true_depth, depth_img))
        
        if i%20 == 0:
            print(i)
            depth_img *= 2**16 - 1
            depth_img = depth_img.round()
            cv2.imwrite(os.path.join('/home/max/monocular',depth_list[i].split('/')[-1]), depth_img.squeeze().astype(np.uint16))

0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560
580
600
620
640
660
680
700
720
740
760
780
800
820
840
860
880
900
920
940
960
980
1000
1020
1040
1060
1080
1100
1120
1140
1160
1180
1200
1220
1240
1260
1280
1300
1320
1340
1360
1380
1400
1420
1440
1460
1480
1500
1520
1540
1560
1580
1600
1620
1640
1660
1680
1700
1720
1740
1760
1780
1800
1820
1840
1860
1880
1900
1920
1940
1960
1980
2000
2020
2040
2060
2080
2100
2120
2140
2160
2180
2200
2220
2240
2260
2280
2300
2320
2340
2360
2380
2400
2420
2440
2460
2480
2500
2520
2540
2560
2580


In [23]:
np.save('/home/max/saved_data/monocular_psnr', np.array(psnr))