# Attempt to load YOLOv5 onto PYNQ-Z2(FPGA)
The goal is to load YOLOv5 by Ultralytics onto the PYNQ-Z2 FPGA, the normal method would be to pip install the Ultralytics library but dependecy clashes prevents this with the PYNQ framework

Instead we will attepmt to use the OpenCV Deep Neural Network Library that is already on the PYNQ frame work to run YOLOv5. This was already tested and confired with YOLOv3 to work but now we will try with YOLOv5. This method will run a custom trained version that is exported to a 16 bit floating point ONNX format in an attempt to accerlate the process.

### Step 1. Get to the trained weights (Optional)
This step is to be runned if you have git pulled this repository straight away. First check your directory.

In [None]:
# Command to check if in right directory
!ls

If you are not in the weights folder, then run the following command

In [6]:
%cd ../
%cd weights

c:\Users\perez\OneDrive\Documents\GitHub\Yolo
c:\Users\perez\OneDrive\Documents\GitHub\Yolo\weights


Now we should be good to begin the rest of the program. If not, then just make sure you are navigated to where you have downloaded the weights.

### Step 2. Set up libraries we need and functions
For this part we just need to set up our variables and libraries

We start With the Libraries we need such as numpy, opencv, and pynq

In [None]:
import cv2 # to use the webcam
# Additional libraries needed
import numpy as np
import time
# Since we are using the PYNQ board we need the bitstream to access the parts of the board
from pynq.overlays.base import BaseOverlay
from pynq.lib.video import *

Now to the functions to make this whole project to work

In [7]:
def predict(image, net):
    blob = cv2.dnn.blobFromImage(image, 1/255.0, (64, 64), swapRB=True, crop=False)
    net.setInput(blob)
    preds = net.forward()
    return preds

def model(path):
    net = cv2.dnn.readNetFromONNX(path)
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
    net.setPreferableTarget(cv2.dnn.DNN_BACKEND_DEFAULT)
    
    return net

def format_yolov5(frame):

    row, col, _ = frame.shape
    _max = max(col, row)
    result = np.zeros((_max, _max, 3), np.uint8)
    result[0:row, 0:col] = frame
    return result

def wrap_detection(input_image, output_data):
    class_ids = []
    confidences = []
    boxes = []

    rows = output_data.shape[0]

    image_width, image_height, _ = input_image.shape

    x_factor = image_width / 64
    y_factor =  image_height / 64

    for r in range(rows):
        row = output_data[r]
        confidence = row[4]
        if confidence >= 0.4:

            classes_scores = row[5:]
            _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
            class_id = max_indx[1]
            if (classes_scores[class_id] > .25):

                confidences.append(confidence)

                class_ids.append(class_id)

                x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item() 
                left = int((x - 0.5 * w) * x_factor)
                top = int((y - 0.5 * h) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)
                box = np.array([left, top, width, height])
                boxes.append(box)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45) 

    result_class_ids = []
    result_confidences = []
    result_boxes = []

    for i in indexes:
        result_confidences.append(confidences[i])
        result_class_ids.append(class_ids[i])
        result_boxes.append(boxes[i])

    return result_class_ids, result_confidences, result_boxes


After this we need to do some setup for the FPGA specifically

In [None]:
# We need to setup the bitstream overlay for the PYNQ board
base = BaseOverlay("base.bit")

# monitor configuration: 640*480 @ 60Hz
Mode = VideoMode(640,480,24)
hdmi_out = base.video.hdmi_out
hdmi_out.configure(Mode,PIXEL_BGR)
hdmi_out.start()

# monitor (output) frame buffer size
frame_out_w = 1920
frame_out_h = 1080
# camera (input) configuration
frame_in_w = 640
frame_in_h = 480

import os
os.environ["OPENCV_LOG_LEVEL"]="SILENT"
# initialize camera from OpenCV

videoIn = cv2.VideoCapture(0)
#videoIn = cv2.cvtColor(cv2.COLOR_BGR2RGB)
videoIn.set(cv2.CAP_PROP_FRAME_WIDTH, frame_in_w);
videoIn.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_in_h);


print("Capture device is open: " + str(videoIn.isOpened()))

If this all worked, then we should see that the capture device is open else check your webcam connection. Now one more function for writing to an HDMI output and finally some additional variable for the whole setup

In [None]:
def PYNQ_Display(frame_vga):
    outframe = hdmi_out.newframe()
    outframe[0:480,0:640,:] = frame_vga[0:480,0:640,:]
    hdmi_out.writeframe(outframe)

In [8]:
colors = [(255, 255, 0), (0, 255, 0), (0, 255, 255), (255, 0, 0)]

net = model("YOLOv5_three_gestures_int8_12.onnx")

start = time.time_ns()
frame_count = 0
total_frames = 0
fps = -1

classes = [] # Gets our Object names
with open("obj.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

NameError: name 'cv2' is not defined

## Step 3. Run YOLOv5

If all has worked well, then all we have to do is run the yolo code and it should work

In [None]:
while True:

    ret, frame = videoIn.read()
    if frame is None:
        print("End of stream")
        break

    inputImage = format_yolov5(frame)
    outs = predict(inputImage, net)

    class_ids, confidences, boxes = wrap_detection(inputImage, outs[0])

    frame_count += 1
    total_frames += 1

    for (classid, confidence, box) in zip(class_ids, confidences, boxes):
         color = colors[int(classid) % len(colors)]
         cv2.rectangle(frame, box, color, 2)
         cv2.rectangle(frame, (box[0], box[1] - 20), (box[0] + box[2], box[1]), color, -1)
         cv2.putText(frame, classes[classid], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0,0,0))

    if frame_count >= 30:
        end = time.time_ns()
        fps = 1000000000 * frame_count / (end - start)
        frame_count = 0
        start = time.time_ns()
    
    if fps > 0:
        fps_label = "FPS: %.2f" % fps
        cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        PYNQ_Display(frame)
    
    print("Total frames: " + str(total_frames))
    if cv2.waitKey(1) > -1:
        print("finished by user")
        break

In [None]:
videoIn.release()
cv2.destroyAllWindows()
hdmi_out.stop()
del hdmi_out