# Human Following - Coursework-part B

In [1]:
import tensorrt as trt
from tensorrt_model import TRTModel
from ssd_tensorrt import load_plugins, parse_boxes,TRT_INPUT_NAME, TRT_OUTPUT_NAME
import ctypes
import numpy as np
import cv2
import os
import ctypes
    
mean = 255.0 * np.array([0.5, 0.5, 0.5])
stdev = 255.0 * np.array([0.5, 0.5, 0.5])

def bgr8_to_ssd_input(camera_value):
    x = camera_value
    x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
    x = x.transpose((2, 0, 1)).astype(np.float32)
    x -= mean[:, None, None]
    x /= stdev[:, None, None]
    return x[None, ...]

class ObjectDetector(object):
    
    def __init__(self, engine_path, preprocess_fn=bgr8_to_ssd_input):
        logger = trt.Logger()
        trt.init_libnvinfer_plugins(logger, '')
        load_plugins()
        self.trt_model = TRTModel(engine_path, input_names=[TRT_INPUT_NAME],output_names=[TRT_OUTPUT_NAME, TRT_OUTPUT_NAME + '_1'])
        self.preprocess_fn = preprocess_fn
        
    def execute(self, *inputs):
        trt_outputs = self.trt_model(self.preprocess_fn(*inputs))
        return parse_boxes(trt_outputs)
    def __call__(self, *inputs):
        return self.execute(*inputs)

model = ObjectDetector('ssd_mobilenet_v2_coco.engine')

In [2]:
#use traitlets and widgets to display the image in Jupyter Notebook
import traitlets
from traitlets.config.configurable import SingletonConfigurable

#use opencv to covert the depth image to RGB image for displaying purpose
import cv2
import numpy as np

#using realsense to capture the color and depth image
import pyrealsense2 as rs

#multi-threading is used to capture the image in real time performance
import threading
distance = 0
class Camera(SingletonConfigurable):
    
    #this changing of this value will be captured by traitlets
    color_value = traitlets.Any()
    
    def __init__(self):
        super(Camera, self).__init__()
        
        self.warning_flag=0
        #configure the color and depth sensor
        self.pipeline = rs.pipeline()
        self.configuration = rs.config()  
        
        #set resolution for the color camera
        self.color_width = 640
        self.color_height = 480
        self.color_fps = 30
        self.configuration.enable_stream(rs.stream.color, self.color_width, self.color_height, rs.format.bgr8, self.color_fps)

        #set resolution for the depth camera
        self.depth_width = 640
        self.depth_height = 480
        self.depth_fps = 30
        self.configuration.enable_stream(rs.stream.depth, self.depth_width, self.depth_height, rs.format.z16, self.depth_fps)

        #flag to control the thread
        self.thread_runnning_flag = False
        
        #start the RGBD sensor
        self.pipeline.start(self.configuration)
        self.pipeline_started = True
        frames = self.pipeline.wait_for_frames()

        #start capture the first color image
        color_frame = frames.get_color_frame()   
        image = np.asanyarray(color_frame.get_data())
        self.color_value = image

        #start capture the first depth image
        depth_frame = frames.get_depth_frame()           
        depth_image = np.asanyarray(depth_frame.get_data())
        depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
        self.depth_value = depth_colormap   

    def _capture_frames(self):
        global distance
        while(self.thread_runnning_flag==True): #continue until the thread_runnning_flag is set to be False
            frames = self.pipeline.wait_for_frames() #receive data from RGBD sensor
            
            color_frame = frames.get_color_frame() #get the color image
            image = np.asanyarray(color_frame.get_data()) #convert color image to numpy array
            self.color_value = image #assign the numpy array image to the color_value variable 

            depth_frame = frames.get_depth_frame() #get the depth image           
            depth_image = np.asanyarray(depth_frame.get_data()) #convert depth data to numpy array
                
            #we only consider the central area of the vision sensor
            depth_image[:190,:]=0
            depth_image[290:,:]=0
            depth_image[:,:160]=0
            depth_image[:,480:]=0
            
            #For object avoidance, we don't consider the distance that are lower than 100mm or bigger than 1000mm
            depth_image[depth_image<100]=0
            depth_image[depth_image>1000]=0
            
            #If all of the values in the depth image is 0, the depth[depth!=0] command will fail
            #we set a specific value here to prevent this failure
            depth_image[0,0]=2000
            distance = depth_image[depth_image!=0].min()
            depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
            if (depth_image[depth_image!=0].min()<400):
                self.warning_flag=1
            else:
                self.warning_flag=0
            self.depth_value = depth_colormap #assign the color BGR image to the depth value
               
    def start(self): #start the data capture thread
        if self.thread_runnning_flag == False: #only process if no thread is running yet
            self.thread_runnning_flag=True #flag to control the operation of the _capture_frames function
            self.thread = threading.Thread(target=self._capture_frames) #link thread with the function
            self.thread.start() #start the thread

    def stop(self): #stop the data capture thread
        if self.thread_runnning_flag == True:
            self.thread_runnning_flag = False #exit the while loop in the _capture_frames
            self.thread.join() #wait the exiting of the thread       

def bgr8_to_jpeg(value):#convert numpy array to jpeg coded data for displaying 
    return bytes(cv2.imencode('.jpg',value)[1])

#create a camera object
camera = Camera.instance()
camera.start() # start capturing the data

In [3]:
def human_followingss(matching_detections,image):
    global distance #Defining the global variable to take the distance from the obstacles 
    largest = [] # [area, startx, starty , endx, endy]
    leftarea , rightarea =0,0
    
    #Checking the lenght of the matcheing detection for the detected human
    if(len(matching_detections) > 0):
        for det in matching_detections:
            #Getting the coordinate of the detected human
            bbox = det['bbox']
            #Drawing the rectangle around the detected human
            cv2.rectangle(image, (int(width * bbox[0]), int(height * bbox[1])),
                          (int(width * bbox[2]), int(height * bbox[3])), (255, 0, 0), 2)
            #Getting the coordinates for the bounding box of the detected human
            startx = int(width * bbox[0])
            starty = int(height * bbox[1])
            endx   = int(width * bbox[2])
            endy   = int(height * bbox[3])
            area   = abs(endx - startx) * abs(endy - starty)
            
            #Getting the largest area of bounding box of the detected human
            if len(largest) == 0 or area > largest[0]:
                largest = [area, startx, starty, endx, endy]
                
        #Detecting the position of bounding box in the image frame along x axis
        try:
            if largest[3] < 320 :
                leftarea = largest[0]
                rightarea = 0
            elif largest[1] >320 :
                rightarea= largest[0]
                leftarea = 0  
            elif largest[3] > 320 and largest[1] < 320:
                leftarea = abs(largest[4] - largest[2]) * abs((width/2)-largest[1])
                rightarea = abs(largest[4] - largest[2]) * abs(largest[3] - width/2)
        except:
            pass
        
        #Robot will stop if the detected human is very close or human is not detected
        if len(matching_detections)==0 or camera.warning_flag == 1:
            robot.stop()
        else:
            #Depending upon the detected bounding box area robot will take right and left movements
            if rightarea > leftarea*3:
                robot.forward_right(0.7) #Moving the robot to forward as well as right
                time.sleep(0.2)          #Moving the robot for about 0.2 seconds

            elif leftarea > rightarea*3:
                robot.forward_left(0.7) #Moving the robot to forward as well as left
                time.sleep(0.2)         #Moving the robot for about 0.2 seconds
            else:
                robot.forward(0.7)

    else:
        #Checking the distance from the obstacles when no human detected, due to close proximity to obstacles
        
        if(distance > 300 and distance < 500):
            robot.right(0.7)
            time.sleep(0.5)
            robot.forward(0.6)
            time.sleep(0.4)
            robot.forward_left(0.7)
            time.sleep(2.3)
            

        

In [4]:
import ipywidgets.widgets as widgets
from IPython.display import display, HTML
from RobotClass import Robot
import math
import time

width = 640
height = 480

image_widget = widgets.Image(format='jpeg', width=300, height=300)
label_widget = widgets.IntText(value=1, description='tracked label')
display(widgets.VBox([widgets.HBox([image_widget,]),label_widget]))

#initialize the Robot class
robot = Robot()



def processing(change):
    
    image = change['new']   
    imgsized= cv2.resize(image,(300,300))
    # compute all detected objects
    
    detections = model(imgsized)
    
    matching_detections = [d for d in detections[0] if d['label'] == int(label_widget.value)]
    
    human_followingss(matching_detections, image)
              
    image_widget.value = bgr8_to_jpeg(image)
    
#the camera.observe function will monitor the color_value variable. If this value changes, the excecute function will be excuted.
camera.observe(processing, names='color_value')

VBox(children=(HBox(children=(Image(value=b'', format='jpeg', height='300', width='300'),)), IntText(value=1, …

In [5]:
camera.unobserve_all()
camera.stop()
time.sleep(1.0)
robot.stop()