-
Notifications
You must be signed in to change notification settings - Fork 0
/
depth_sensing.py
85 lines (62 loc) · 2.4 KB
/
depth_sensing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""01. Predict depth from a single image with pre-trained Monodepth2 models
===========================================================================
This is a quick demo of using GluonCV Monodepth2 model for KITTI on real-world images.
Please follow the `installation guide <../../index.html#installation>`__
to install MXNet and GluonCV if not yet.
"""
import numpy as np
import mxnet as mx
from mxnet.gluon.data.vision import transforms
import gluoncv
import argparse
import time
import PIL.Image as pil
import numpy as np
import mxnet as mx
from mxnet.gluon.data.vision import transforms
import gluoncv
import cv2
import matplotlib as mpl
import matplotlib.cm as cm
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import scipy.misc
from PIL import Image
# using cpu
ctx = mx.cpu(0)
# Video Stream:
video_source = "Webcam"
# From CAMERA
if(video_source == "Webcam"):
cap = cv2.VideoCapture(0)
ok, frame = cap.read()
original_height, original_width = frame.shape[:2]
feed_height = 96
feed_width = 320
#feed_height = 192
#feed_width = 640
model = gluoncv.model_zoo.get_model('monodepth2_resnet18_kitti_mono_640x192', #monodepth2_resnet18_kitti_stereo_640x192 monodepth2_resnet18_posenet_kitti_mono_640x192
pretrained_base=False, ctx=ctx, pretrained=True)
while True:
# Read a new frame
ok, frame = cap.read()
raw_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(raw_img)
img = img.resize((feed_width, feed_height), pil.LANCZOS)
img = transforms.ToTensor()(mx.nd.array(img)).expand_dims(0).as_in_context(context=ctx)
outputs = model.predict(img)
disp = outputs[("disp", 0)]
disp_resized = mx.nd.contrib.BilinearResize2D(disp, height=int(original_height), width=int(original_width))
disp_resized_np = disp_resized.squeeze().as_in_context(mx.cpu()).asnumpy()
vmax = np.percentile(disp_resized_np, 95)
normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
im = pil.fromarray(colormapped_im)
depth_frame = np.asarray(im)
output = np.concatenate((depth_frame, frame), axis=0)
cv2.imshow('frame', output)
k = cv2.waitKey(1)
if k == 27: # If escape was pressed exit
cv2.destroyAllWindows()
break