In [1]:
import torch
import cv2
import time
import matplotlib.pyplot as plt
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly.offline import iplot

from lib.utils.utils import process_model_dict
from lib.utils.visualizer import Visualizer
from lib.utils.video_iterator import VideoIterator
from lib.utils.inference_helper import InferenceHelper
from lib.models.unet_adaptive_bins_legacy import UnetAdaptiveBins

In [2]:
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')

In [3]:
MIN_DEPTH = 1e-3
MAX_DEPTH = 10
FRAME_SIZE = (640, 480)

model_depth = UnetAdaptiveBins.build(n_bins = 256, min_val = MIN_DEPTH, max_val = MAX_DEPTH)
pretrained_ckpt = process_model_dict('weights/AdaBins_nyu.pt')
model_depth.load_state_dict(pretrained_ckpt)

model_seg = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained = True)

infer = InferenceHelper(model_depth, model_seg, MIN_DEPTH, MAX_DEPTH, device = DEVICE)

Loading base model ()...

Using cache found in /home/bharath/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master


Done.
Removing last two layers (global_pool & classifier).
Building Encoder-Decoder model..Done.


Using cache found in /home/bharath/.cache/torch/hub/pytorch_vision_v0.10.0


In [4]:
img = cv2.imread('/home/bharath/code/Pros/Multi-Exit-Dense-Networks/data/test/image_5.jpg')
img = cv2.resize(img, FRAME_SIZE)

bin_center, pred_depth, pred_seg = infer.predict(img)

In [5]:
camera = {
    'center': {'x': 0, 'y': 0, 'z': 0},
    'eye': {'x': -0.05060184209509067, 'y': -0.8272060402743945, 'z': -2.0001673981220023},
    'projection': {'type': 'perspective'},
    'up': {'x': 0, 'y': 0, 'z': 1}
}

In [6]:
data = go.Surface(
    z = pred_depth.squeeze(),
    colorscale='viridis'
)
fig = go.FigureWidget(data)
fig.layout.scene.camera = camera
fig.data[0].update(surfacecolor=img[:,:,0])
fig.update_layout(width = 1000, height = 800)
fig

FigureWidget({
    'data': [{'colorscale': [[0.0, '#440154'], [0.1111111111111111, '#482878'],
               …

In [10]:
SAMPLING_RATE = 0.05

In [23]:
skip_frames = int(1 / SAMPLING_RATE)
vid_iter = VideoIterator('data/test/test3.mkv', FRAME_SIZE)
pred_prev = np.ones((480, 640)) * 1.7

for i, frame in enumerate(vid_iter):
    if i % skip_frames == 0:
        _, pred_depth, pred_seg = infer.predict(frame, is_depth = True, is_seg = False)
        pred_prev = (pred_prev + pred_depth.squeeze()) / 2
        fig.data[0].z = pred_prev
        fig.data[0].update(surfacecolor = frame[:,:,0])
        time.sleep(5)