In [31]:
import torch
import torchvision.models as models
import cv2
from torchvision.io.image import read_image
from PIL import Image
from torchvision.transforms.functional import pil_to_tensor

#defining mobinet v3 model
model = models.detection.fasterrcnn_resnet50_fpn(weights=True)
model_lite = models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(weights=True)
model2 = models.detection.ssdlite320_mobilenet_v3_large(pretrained=True)
model_shuffle = models.shufflenet_v2_x0_5(pretrained=True)

#evaluating the model
model.eval()
model_lite.eval()
model2.eval()
model_shuffle.eval()

ShuffleNetV2(
  (conv1): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (stage2): Sequential(
    (0): InvertedResidual(
      (branch1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (4): ReLU(inplace=True)
      )
      (branch2): Sequential(
        (0): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_

In [32]:
import pyzed.sl as sl
import cv2
import math
zed = sl.Camera()

In [33]:
# Set configuration parameters
init_params = sl.InitParameters()
init_params.camera_resolution = sl.RESOLUTION.HD1080
init_params.camera_fps = 30

# Open the camera
err = zed.open(init_params)
if err != sl.ERROR_CODE.SUCCESS:
    print("Error {}, exit program".format(err)) # Display the error
    exit()

In [34]:
# cap = cv2.VideoCapture(0)
# while(True):
#     #Predictions
#     ret, frame = cap.read()    
#     rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#     #Convert the frame into PIL image
#     image = Image.fromarray(rgb)
#     preprocess = pil_to_tensor(image)
#     preprocess = preprocess.unsqueeze(dim=0)
#     input_image = preprocess/255.0
#     predictions = model_lite(input_image)
#     x1 = int(predictions[0]['boxes'][0][0].item())
#     y1 = int(predictions[0]['boxes'][0][1].item())
#     x2 = int(predictions[0]['boxes'][0][2].item())
#     y2 = int(predictions[0]['boxes'][0][3].item())
#     cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
#     cv2.imshow("Out", frame)
#     print(predictions[0]['labels'])
#     if(cv2.waitKey(1)==27):
#         cv2.destroyAllWindows()
#         cap.release()
#         break

In [35]:
while(True):
    # Create an RGBA sl.Mat object
    image_zed = sl.Mat(zed.get_camera_information().camera_configuration.resolution.width, zed.get_camera_information().camera_configuration.resolution.height, sl.MAT_TYPE.U8_C4)
    # Retrieve data in a numpy array with get_data()
    image_ocv = image_zed.get_data()
    if zed.grab() == sl.ERROR_CODE.SUCCESS :
        # Retrieve the left image in sl.Mat
        zed.retrieve_image(image_zed, sl.VIEW.LEFT)
        # Use get_data() to get the numpy array
        image_ocv = image_zed.get_data()
        # Display the left image from the numpy array
    rgb = cv2.cvtColor(image_ocv, cv2.COLOR_BGR2RGB)
    
    
    #Convert the frame into PIL image
    image = Image.fromarray(rgb)
    preprocess = pil_to_tensor(image)
    preprocess = preprocess.unsqueeze(dim=0)
    input_image = preprocess/255.0
    predictions = model_lite(input_image)
    max_score = -1
    required_label = 1
    min_box = None
    #get the max confidence of the required label
    for i in range(len(predictions)):
        if predictions[i]['labels'][0].item() == required_label:
            if predictions[i]['scores'][0].item()>max_score:
                min_box = predictions[i]['boxes'][0].tolist()

    
    x = None
    y = None
    if min_box is not None:
        x1 = int(min_box[0])
        y1 = int(min_box[1])
        x2 = int(min_box[2])
        y2 = int(min_box[3])
        x = int(x1 + (x2-x1)/2)
        y = int(y1 + (y2-y1)/2)
        cv2.rectangle(image_ocv, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.circle(image_ocv, (x, y), 2, (0, 123, 222), 5)


    
    cv2.imshow("Out", image_ocv)

    if (x is not None and y is not None):
        depth_zed = sl.Mat(zed.get_camera_information().camera_configuration.resolution.width, zed.get_camera_information().camera_configuration.resolution.height, sl.MAT_TYPE.F32_C1)
        if zed.grab() == sl.ERROR_CODE.SUCCESS :
            # Retrieve depth data (32-bit)
            zed.retrieve_measure(depth_zed, sl.MEASURE.DEPTH)
            # Load depth data into a numpy array
            depth_ocv = depth_zed.get_data()
            # Print the depth value at the center of the image
            # print(depth_ocv[y][x])
            point_cloud = sl.Mat()
            zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA)
            point3D = point_cloud.get_value(x, y)
            x_dist = point3D[1][0]
            y_dist = point3D[1][1]
            z_dist = point3D[1][2]
            color = point3D[1][3]
            print("X = ", x_dist, "\nY = ", y_dist, "\nZ = ", z_dist)
            rad = math.sqrt(x_dist**2 + y_dist**2 + z_dist**2)
            print("Distance = ", rad)



    if(cv2.waitKey(1)==27):
        cv2.destroyAllWindows()
        zed.close()
        break

X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan
X =  nan 
Y =  nan 
Z =  nan
Distance =  nan


In [36]:
zed.close()