# Image Based Visual Servoing



### Import camera calibration data

In [1]:
import pickle
import cv2

# Load COCO labels
filename = "coco_labels.dat"
filehandler = open(filename, 'rb')
COCO_labels = pickle.load(filehandler)

# Load camera calibration data for undistort
filename = "calibration.dat"
filehandler = open(filename, 'rb')
camera_cal = pickle.load(filehandler)

mtx = camera_cal['mtx']
dist = camera_cal['dist']


### Load pre-trained SSD model

In [2]:
from jetbot import ObjectDetector

model = ObjectDetector('../Notebooks/object_following/ssd_mobilenet_v2_coco.engine')

### Set up camera for object detection
* Camera is set up for 300x300 pixel video
* Use undistort() to undistort camera image prior to object detection

In [3]:
from jetbot import Camera

camera = Camera.instance(width=300, height=300)

In [4]:
def undistort(img, mtx, dist, crop=False):
   
    h,  w = img.shape[:2]
    print (h,w)
    
    newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),1,(w,h))
    
    # undistort
    dst = cv2.undistort(img, mtx, dist, None, newcameramtx)

    # crop the image
    if crop:
        x,y,w,h = roi
        dst = dst[y:y+h, x:x+w]
    
    return dst

In [11]:
from IPython.display import display
import ipywidgets.widgets as widgets
from jetbot import bgr8_to_jpeg

def detection_center(detection):
    """Computes the center x, y coordinates of the object"""
    bbox = detection['bbox']
    center_x = (bbox[0] + bbox[2]) / 2.0 - 0.5
    center_y = (bbox[1] + bbox[3]) / 2.0 - 0.5
    return (center_x, center_y)
    
def norm(vec):
    """Computes the length of the 2D vector"""
    return np.sqrt(vec[0]**2 + vec[1]**2)

def closest_detection(detections):
    """Finds the detection closest to the image center"""
    closest_detection = None
    for det in detections:
        center = detection_center(det)
        if closest_detection is None:
            closest_detection = det
        elif norm(detection_center(det)) < norm(detection_center(closest_detection)):
            closest_detection = det
    return closest_detection



In [14]:
image_widget = widgets.Image(format='jpeg', width=300, height=300)

display(widgets.HBox([image_widget]))

width = int(image_widget.width)
height = int(image_widget.height)

HBox(children=(Image(value=b'', format='jpeg', height='300', width='300'),))

In [69]:
undistort_image = undistort(camera.value, mtx, dist)
detections = model(undistort_image)

items = detections[0]
for item in items:
    print(item['label'],COCO_labels[item['label']], item['confidence'], item['bbox'])

300 300
72 laptop 0.8606036901473999 [0.2503645420074463, 0.2957329750061035, 0.4554458260536194, 0.4502370357513428]


In [70]:
target = 62

# draw all detections on image
for det in detections[0]:
    label = COCO_labels[det['label']]
    bbox = det['bbox']
    bbox_pixel = [(int(width * bbox[0]), int(height * bbox[1])), 
                   (int(width * bbox[2]), int(height * bbox[3]))]
    cv2.rectangle(undistort_image, bbox_pixel[0], bbox_pixel[1], (255, 0, 0), 1)
    
    print(label,bbox_pixel)
    
    # select detections that match selected class label
    matching_detections = [d for d in detections[0] if d['label'] == target]
    
    # get detection closest to center of field of view and draw it
    det = closest_detection(matching_detections)
    
    if det is not None:
        bbox = det['bbox']
        cv2.rectangle(undistort_image, (int(width * bbox[0]), int(height * bbox[1])), (int(width * bbox[2]), int(height * bbox[3])), (0, 255, 0), 2)

image_widget.value = bgr8_to_jpeg(undistort_image)


laptop [(75, 88), (136, 135)]


### Feature Points for Objects

From (0,0,0) to (1,0,0): 

* the TV will act as feature point from Start (0,0,0) to Midpoint (0.5,0,0).
* the Stool will act as feature point from Start (0.5,0,0) to Midpoint (1,0,0).

In [68]:
start_bbox_TV = [(75, 88), (136, 135)]
midpoint_bbox_TV = [(52, 63), (141, 125)]
midpoint_bbox_stool = [(148, 125), (181, 190)]
target_bbox_stool = [(153, 105), (202, 187)]

In [74]:
def fp_delta(current, desired):
    current_x, current_y = current
    desired_x, desired_y = desired
    return (desired_x-current_x, desired_y-current_y)


In [76]:
TV_fp1, TV_fp2 = start_bbox_TV
desired_TV_fp1, desired_TV_fp2 = midpoint_bbox_TV

error_TV = (fp_delta(TV_fp1,desired_TV_fp1), fp_delta(TV_fp2,desired_TV_fp2))
print (error_TV)

((-23, -26), (4, -10))
