# YOLOv2 on PYNQ-Z1 and Movidius NCS: HDMI example
To run this notebook, you need to connect the PYNQ-Z1 HDMI input to a video source and connect the HDMI output to a monitor. See this guide to [setup your PYNQ-Z1 for the Movidius NCS](http://www.fpgadeveloper.com/2018/04/setting-up-the-pynq-z1-for-the-intel-movidius-neural-compute-stick.html).
### Load required packages and the base overlay

In [1]:
import cv2
import numpy as np
import time
from pynq.overlays.base import BaseOverlay
from pynq.lib.video import *

import sys
sys.path.insert(0, '../detectionExample')

from ObjectWrapper import *
from Visualize import *

# Load the base overlay
base = BaseOverlay("base.bit")

### Configure the HDMI input and output
We configure both interfaces for RGB as our YOLO network needs RGB frames

In [2]:
hdmi_in = base.video.hdmi_in
hdmi_out = base.video.hdmi_out

# Configure the HDMI output to the same resolution as the HDMI input
hdmi_in.configure(PIXEL_RGB)
hdmi_out.configure(hdmi_in.mode, PIXEL_RGB)

# Start the HDMI interfaces
hdmi_in.start()
hdmi_out.start()

<contextlib._GeneratorContextManager at 0x2945ee30>

### Open the Movidius NCS

In [3]:
# Open the Movidius NCS device
detector = ObjectWrapper('../graph')

### Calculate subframe indices to cut out of the input frame for the YOLO network

In [4]:
# YOLOv2 input frame dimensions
dim = detector.dim

# The input frame width and height
frame_in_w = hdmi_in.mode.width
frame_in_h = hdmi_in.mode.height

# The frame width and height required by the YOLO network
yolo_width = dim[0]
yolo_height = dim[1]

# The subframe indicies to cut-out for the YOLO network (the middle of the input frame)
frame_top = (frame_in_h-yolo_height)//2
frame_bot = frame_in_h-frame_top
frame_left = (frame_in_w-yolo_width)//2
frame_right = frame_in_w-frame_left

if frame_in_h/frame_in_w > dim[1]/dim[0]:
    neww = int(frame_in_w * dim[1] / frame_in_h)
    newh = dim[1]
else:
    newh = int(frame_in_h * dim[0] / frame_in_w)
    neww = dim[0]
offx = int((dim[0] - neww)/2)
offy = int((dim[1] - newh)/2)

# X offset of the image WRT the original image size
offx_orig = int(offx*frame_in_w/neww)
# Y offset of the image WRT the original image size
offy_orig = int(offy*frame_in_h/newh)
# Horizontal scale factor
xscale = neww/dim[0]
# Vertical scale factor
yscale = newh/dim[1]

print('offx_orig:',offx_orig,'offy_orig:',offy_orig,'xscale:',xscale,'yscale:',yscale)

offx_orig: 0 offy_orig: 280 xscale: 1.0 yscale: 0.5625


### HDMI video pass-through (without YOLO)

In [5]:
n_frames = 2000

start_time = time.time()

for _ in range(n_frames):
    frame = hdmi_in.readframe()
    frame_out = hdmi_out.newframe()
    frame_out[frame_top:frame_bot,frame_left:frame_right,:] = frame[frame_top:frame_bot,frame_left:frame_right,:]
    hdmi_out.writeframe(frame_out)

end_time = time.time()

print('Runtime:',end_time-start_time,'FPS:',n_frames/(end_time-start_time))

Runtime: 33.403541564941406 FPS: 59.87389080022265


### HDMI video with YOLO

In [6]:
n_frames = 200

start_time = time.time()

for _ in range(n_frames):
    # Get a frame from HDMI input
    frame = hdmi_in.readframe()
    
    # Cut out a sub-frame for NCS (416x416) and divide all RGB values by 255
    small_frame = frame[frame_top:frame_bot,frame_left:frame_right,:]
    ncs_frame = small_frame.copy()/255.0

    # Send the frame to the NCS
    results = detector.DetectFromPreparedImage(ncs_frame,dim[0],dim[1],0,0,1.0,1.0)
    
    # Interpret results and draw the boxes on the image
    img_res = Visualize(small_frame, results)

    # Copy labelled image into output frame
    frame_out = hdmi_out.newframe()
    frame_out[frame_top:frame_bot,frame_left:frame_right,:] = img_res[:,:,:]
    hdmi_out.writeframe(frame_out)

end_time = time.time()

print('Runtime:',end_time-start_time,'FPS:',n_frames/(end_time-start_time))

Runtime: 63.896953105926514 FPS: 3.130039701086307


### HDMI video with YOLO full frame
In this test we resize the whole input frame to the size required by the YOLO network (448x448). The resize operation is performed by software, so it has a significant drag on the runtime and FPS. In the future, I'll try moving the resize operation to the FPGA.

In [7]:
n_frames = 200

# Initialize frame to be sent to network
ncs_frame = np.empty((dim[0], dim[1], 3))
ncs_frame.fill(0.5)

start_time = time.time()

for _ in range(n_frames):
    # Get a frame from HDMI input
    frame = hdmi_in.readframe()
    
    # Resize to the frame size required by YOLO network (416x416)
    small_frame = cv2.resize(frame, dsize=(neww, newh), interpolation=cv2.INTER_CUBIC)
    ncs_frame[offy:offy+newh,offx:offx+neww,:] = small_frame.copy()/255.0
    
    # Send the frame to the NCS
    results = detector.DetectFromPreparedImage(ncs_frame,frame_in_w,frame_in_h,offx_orig,offy_orig,xscale,yscale)
    
    # Interpret results and draw the boxes on the image
    img_res = Visualize(frame, results)
    
    # Copy labelled image into the output frame
    frame_out = hdmi_out.newframe()
    frame_out[:,:,:] = img_res[:,:,:]
    hdmi_out.writeframe(frame_out)

end_time = time.time()

print('Runtime:',end_time-start_time,'FPS:',n_frames/(end_time-start_time))

Runtime: 168.42586183547974 FPS: 1.1874660923235307


### Close the Movidius NCS

In [8]:
del detector

### Close the HDMI interfaces

In [9]:
hdmi_out.stop()
hdmi_in.stop()
del hdmi_in, hdmi_out