In [1]:
import torch
import cv2
import time
import matplotlib.pyplot as plt
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly.offline import iplot

from lib.utils.utils import process_model_dict
from lib.utils.visualizer import Visualizer
from lib.utils.video_iterator import VideoIterator
from lib.utils.inference_helper import InferenceHelper
from lib.models.unet_adaptive_bins_legacy import UnetAdaptiveBins

In [2]:
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')


CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at  /opt/conda/conda-bld/pytorch_1640811806235/work/c10/cuda/CUDAFunctions.cpp:112.)



In [3]:
MIN_DEPTH = 1e-3
MAX_DEPTH = 10
FRAME_SIZE = (640, 480)

model_depth = UnetAdaptiveBins.build(n_bins = 256, min_val = MIN_DEPTH, max_val = MAX_DEPTH)
pretrained_ckpt = process_model_dict('weights/AdaBins_nyu.pt')
model_depth.load_state_dict(pretrained_ckpt)

model_seg = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained = True)

infer = InferenceHelper(model_depth, model_seg, MIN_DEPTH, MAX_DEPTH, device = DEVICE)

Loading base model ()...

Using cache found in /home/bharath/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master


Done.
Removing last two layers (global_pool & classifier).
Building Encoder-Decoder model..Done.


Using cache found in /home/bharath/.cache/torch/hub/pytorch_vision_v0.10.0


In [4]:
img = cv2.imread('/home/bharath/code/Pros/Multi-Exit-Dense-Networks/data/test/image_5.jpg')
img = cv2.resize(img, FRAME_SIZE)

bin_center, pred_depth, pred_seg = infer.predict(img)
depth_vis = Visualizer(1200, 1000, img, pred_depth, pred_seg)

depth_vis.fig

FigureWidget({
    'data': [{'type': 'image',
              'uid': 'c97f81ad-b246-4330-b50a-5372d6f6252d',
   …

In [5]:
SAMPLING_RATE = 0.05
DEPTH_SAMPLING_WEIGHT = 2
SEG_SAMPLING_WEIGHT = 1

In [14]:
skip_frames = int(1 / SAMPLING_RATE)
vid_iter = VideoIterator('data/test/test3.mkv', FRAME_SIZE)

depth_samp_count = 0
seg_samp_count  = 0

for i, frame in enumerate(vid_iter):
    if i % skip_frames == 0:
        depth_vis.update_img(frame)
        if depth_samp_count < DEPTH_SAMPLING_WEIGHT:
            depth_samp_count += 1
            _, pred_depth, pred_seg = infer.predict(frame, is_depth = True, is_seg = False)
            depth_vis.update_depth(pred_depth.squeeze(), frame)

        elif seg_samp_count < SEG_SAMPLING_WEIGHT - 1:
            seg_samp_count += 1
            _, pred_depth, pred_seg = infer.predict(frame, is_depth = False, is_seg = True)
            depth_vis.update_seg(pred_seg.squeeze())

        else:
            depth_samp_count = 0
            seg_samp_count = 0
            _, pred_depth, pred_seg = infer.predict(frame, is_depth = False, is_seg = True)
            depth_vis.update_seg(pred_seg.squeeze())
        
        time.sleep(5)