In [22]:
import torch
import os
import numpy as np
import util
import data_util
import matplotlib.pyplot as plt
from glob import glob
from scipy.io import loadmat


from deep_voxels import DeepVoxels
from projection import ProjectionHelper

In [23]:
device = torch.device('cuda')
model_path = os.path.join('/home/max/cache/logs/', 'model-epoch_36_iter_24000.pth')
data_path = os.path.join('/home/max/pretest/')
all_test_rgbs = sorted(glob(os.path.join('/home/max/depth_infer_dataset/rgb', '*.png')))
all_test_poses = sorted(glob(os.path.join('/home/max/depth_infer_dataset/pose', '*.mat')))
all_test_rgbs = [file for file in all_test_rgbs if 'car' not in file]
all_test_poses = [file for file in all_test_poses if 'car' not in file]
car_test_rgbs = sorted(glob(os.path.join('/home/max/depth_infer_dataset/rgb', 'car*.png')))
car_test_poses = sorted(glob(os.path.join('/home/max/depth_infer_dataset/pose', 'car*.mat')))
this_test_rgbs = sorted(glob(os.path.join('/home/max/pretest/rgb', '*.png')))
this_test_poses = sorted(glob(os.path.join('/home/max/pretest/pose', '*.mat')))

In [24]:
img_dim = 256
grid_dim = 32
proj_image_dims = [64, 64]

_, grid_barycenter, scale, near_plane, _ = \
    util.parse_intrinsics(os.path.join(data_path, 'intrinsics.txt'), trgt_sidelength=proj_image_dims[0])

if near_plane == 0.0:
    near_plane = np.sqrt(3)/2

In [25]:
lift_intrinsic = util.parse_intrinsics(os.path.join(data_path, 'intrinsics.txt'),
                                       trgt_sidelength=proj_image_dims[0])[0]
proj_intrinsic = lift_intrinsic

# Set up scale and world coordinates of voxel grid
voxel_size = (1. / grid_dim) * 1.1 * scale
grid_origin = torch.tensor(np.eye(4)).float().to(device).squeeze()
grid_origin[:3,3] = grid_barycenter

# Minimum and maximum depth used for rejecting voxels outside of the cmaera frustrum
depth_min = 0.
depth_max = grid_dim * voxel_size + near_plane
grid_dims = 3 * [grid_dim]

# Resolution of canonical viewing volume in the depth dimension, in number of voxels.
frustrum_depth = int(np.ceil(1.5 * grid_dim))

In [26]:
model = DeepVoxels(lifting_img_dims=proj_image_dims,
                   frustrum_img_dims=proj_image_dims,
                   grid_dims=grid_dims,
                   use_occlusion_net=True,
                   num_grid_feats=64,
                   nf0=64,
                   img_sidelength=img_dim)
model.to(device)


# Projection module
projection = ProjectionHelper(projection_intrinsic=proj_intrinsic,
                              lifting_intrinsic=lift_intrinsic,
                              depth_min=depth_min,
                              depth_max=depth_max,
                              projection_image_dims=proj_image_dims,
                              lifting_image_dims=proj_image_dims,
                              grid_dims=grid_dims,
                              voxel_size=voxel_size,
                              device=device,
                              frustrum_depth=frustrum_depth,
                              near_plane=near_plane)

OcclusionNet(
  (occlusion_prep): Sequential(
    (0): Conv3dSame(
      (net): Sequential(
        (0): ReplicationPad3d((1, 1, 1, 1, 1, 1))
        (1): Conv3d(65, 4, kernel_size=(3, 3, 3), stride=(1, 1, 1), bias=False)
      )
    )
    (1): BatchNorm3d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (occlusion_net): Unet3d(
    (in_layer): Sequential(
      (0): Conv3dSame(
        (net): Sequential(
          (0): ReplicationPad3d((1, 1, 1, 1, 1, 1))
          (1): Conv3d(4, 4, kernel_size=(3, 3, 3), stride=(1, 1, 1), bias=False)
        )
      )
      (1): BatchNorm3d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2, inplace)
    )
    (unet_block): UnetSkipConnectionBlock3d(
      (model): Sequential(
        (0): DownBlock3D(
          (net): Sequential(
            (0): ReplicationPad3d((1, 1, 1, 1, 1, 1))
            (1): Conv3d(4, 8, kernel_size=(4, 4, 4), stride=(2, 2,

In [27]:
util.custom_load(model, model_path)
model.eval()

DeepVoxels(
  (feature_extractor): Sequential(
    (0): DownsamplingNet(
      (downs): Sequential(
        (0): DownBlock(
          (net): Sequential(
            (0): ReflectionPad2d((1, 1, 1, 1))
            (1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
            (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (3): LeakyReLU(negative_slope=0.2, inplace)
            (4): ReflectionPad2d((1, 1, 1, 1))
            (5): Conv2d(64, 64, kernel_size=(4, 4), stride=(2, 2), bias=False)
            (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (7): LeakyReLU(negative_slope=0.2, inplace)
          )
        )
        (1): DownBlock(
          (net): Sequential(
            (0): ReflectionPad2d((1, 1, 1, 1))
            (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
            (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stat

In [28]:
pretest_rgb_list = sorted(glob(os.path.join(data_path,'rgb','*.png')))
pretest_pose_list = sorted(glob(os.path.join(data_path,'pose','*.mat')))

In [29]:
inpt_rgbs = list()
backproj_mapping = list()
for j in range(8):
    each_img = data_util.load_img(pretest_rgb_list[j], square_crop=True, downsampling_order=1, target_size=[img_dim, img_dim])
    each_img = each_img[:, :, :3].astype(np.float32) / 255. - 0.5
    each_img = each_img.transpose(2,0,1)
    each_img = torch.from_numpy(each_img)

    each_pose = loadmat(pretest_pose_list[j])['extrinsic']
    each_pose = np.linalg.inv(each_pose).astype(np.float32)
    each_pose = torch.from_numpy(each_pose)

    inpt_rgbs.append(each_img.unsqueeze(0).to(device))
    backproj_mapping.append(projection.comp_lifting_idcs(camera_to_world=each_pose.squeeze().to(device),grid2world=grid_origin))

values = list()
for rgb, pose in zip(all_test_rgbs, all_test_poses):

    with torch.no_grad():
        model.representation = torch.zeros((1, model.n_grid_feats, model.grid_dims[0], model.grid_dims[1], model.grid_dims[2])).cuda()

        lift_volume_idcs, lift_img_coords = list(zip(*backproj_mapping))

        model(inpt_rgbs, 'pretest', None, None, lift_volume_idcs, lift_img_coords, None)


    inpt_rgbs_test = list()
    backproj_mapping_test = list()

    each_img = data_util.load_img(rgb, square_crop=True, downsampling_order=1, target_size=[img_dim, img_dim])
    each_img = each_img[:, :, :3].astype(np.float32) / 255. - 0.5
    each_img = each_img.transpose(2,0,1)
    each_img = torch.from_numpy(each_img)

    each_pose = loadmat(pose)['extrinsic']
    each_pose = np.linalg.inv(each_pose).astype(np.float32)
    each_pose = torch.from_numpy(each_pose)

    inpt_rgbs_test.append(each_img.unsqueeze(0).to(device))
    backproj_mapping_test.append(projection.comp_lifting_idcs(camera_to_world=each_pose.squeeze().to(device),grid2world=grid_origin))


    with torch.no_grad():
        lift_volume_idcs, lift_img_coords = list(zip(*backproj_mapping_test))

        update, old_state, final = model(inpt_rgbs_test, 'pretest', None, None, lift_volume_idcs, lift_img_coords, None, return_gates=True)
        
    value = np.abs((update * (old_state - final)).cpu().numpy()).mean()
    values.append(value)

In [30]:
np.save(os.path.join('/home/max/saved_data', 'all'), np.array(values))

In [33]:
np.array(values).mean()

0.11557131