# Démonstration du modèle TensorRT en temps réel

Dans ce carnet, nous allons étudier comment faire suivre au Jetbot un objet spécifique. Nous allons utiliser un modèle pré-entrainé SSD-MobileNet V2 sur le dataset COCO qui est composé de 90 classes d'objets. Parmi ces objets on trouve :
- Personnes (index 0)
- Coupe (index 47)

et plein d'autres (vous pouvez trouver les index dans ce fichier : https://github.com/tensorflow/models/blob/master/research/object_detection/data/mscoco_label_map.pbtxt).

Ce modèle a précédemment été optimisé avec TensorRT ce qui le rend très rapide. 

### Classe de capture et de traitement du flux vidéo

La classe suivante est hérité des Threading et prend en charge :

- L'initialisation de la caméra et du moteur TensorRT
- La capture du flux vidéo et son traitement avec le modèle, exécuté dans le programme principal du thread

In [2]:
import tensorrt as trt
import numpy as np
import pycuda.driver as cuda
import threading
import ctypes
import time
import traitlets
import atexit
import cv2

class TRTInference(threading.Thread):
    def __init__(self,repertoire_engine, repertoire_labels, widget_image,
                 type_camera="CSI",capture_device="0",capture_width="320",capture_height="320",
                 display_width="320",display_height="320",fps="30",flip=0):
        threading.Thread.__init__(self)
        self.widget_image = widget_image
        self.type_camera = type_camera
        self.capture_device = capture_device
        self.capture_width = capture_width
        self.capture_height = capture_height
        self.display_width = display_width
        self.display_height = display_height
        self.fps = fps
        self.flip = flip
        self.camera_on = False

        # Initialisation des variables de la caméra
        self._running = False
        self.image = np.zeros((self.display_height, self.display_width, 3), dtype=np.uint8)
        if self.type_camera.find("CSI")>=0:
            self.cap = cv2.VideoCapture(self._gstreamer_pipeline_CSI(),cv2.CAP_GSTREAMER)
        else:
            self.cap = cv2.VideoCapture(self._gstreamer_pipeline_USB(),cv2.CAP_GSTREAMER)
        if self.cap.isOpened():
            print("Caméra initialisée")
        else:
            print("Erreur d'ouverture du flux vidéo")
        atexit.register(self.cap.release)

        # Initialisation du runtime TensorRT
        #self.logger = MyLogger()
        self.logger = trt.Logger(trt.Logger.INFO)
        trt.init_libnvinfer_plugins(self.logger, namespace="")
        self.runtime = trt.Runtime(self.logger)
        
        # Chargement du moteur
        print("Chargement du moteur...")
        with open(repertoire_engine, "rb") as f:
            self.engine = self.runtime.deserialize_cuda_engine(f.read())
        
        self.context = self.engine.create_execution_context()
 
        #Initialisation du context Cuda et du contexte TensorRT 
        cuda.init()
        self.cudactx = cuda.Device(0).retain_primary_context()
        self.cudactx.push()
        self.context.debug_sync = True
        
        # Réservation de la mémoire pour l'entrée
        print("Allocation mémoire...")
        size_input = trt.volume(self.engine.get_binding_shape(0))*self.engine.max_batch_size
        self.input_host_mem = cuda.pagelocked_empty(size_input, np.float32)
        self.input_device_mem = cuda.mem_alloc(self.input_host_mem.nbytes)

        # Réservation de la mémoire pour les sorties
        self.output_device_mem = [];
        format_sorties = [];
        types_sorties = [];

        for i in range(self.engine.num_bindings):
            if not self.engine.binding_is_input(i):
                size_output = trt.volume(self.engine.get_binding_shape(i))*self.engine.max_batch_size
                output_hm = cuda.pagelocked_empty(size_output,trt.nptype(self.engine.get_binding_dtype(i)))
                self.output_device_mem.append(cuda.mem_alloc(output_hm.nbytes))
                format_sorties.append(self.engine.get_binding_shape(i))
                types_sorties.append(trt.nptype(self.engine.get_binding_dtype(i)))

        # Récupère les adresses en GPU des buffers entrées / sorties
        binding_entree = int(self.input_device_mem)
        binding_sorties = []

        for output_ in self.output_device_mem:
            binding_sorties.append(int(output_))
        self.bindings = [binding_entree, binding_sorties[0],binding_sorties[1],binding_sorties[2],binding_sorties[3]]

        # Allocation de la mémoire hote pour les sorties
        self.output_host_mem = []
        for i in range(len(self.output_device_mem)):
            self.output_host_mem.append(np.zeros(format_sorties[i],types_sorties[i]))
        
        # Input tensor
        self.image = np.zeros((320,320,3), dtype=trt.nptype(self.engine.get_binding_dtype(0)))

        # Initialisation des labels
        self.classes = self.read_label_map(repertoire_labels)

        self.cudactx.pop()

    # Lectures de labels
    def read_label_map(self,label_map_path):
        item_id = None
        item_name = None
        items = {}

        with open(label_map_path, "r") as file:
            for line in file:
                line.replace(" ", "")
                if line == "item{":
                    pass
                elif line == "}":
                    pass
                elif "id" in line:
                    item_id = int(line.split(":", 1)[1].strip())
                elif "display_name" in line:
                    item_name = line.split(" ")[-1].replace("\"", " ").strip()
                if item_id is not None and item_name is not None:
                    items[item_id] = item_name
                    item_id = None
                    item_name = None
        return items

    # Inférence
    def Calcul(self):
        # Copie de l'image dans le tenseur d'entrée
        x = self.image.astype(np.float32)
        x = np.expand_dims(x,axis=0)                    # (1,320,320,3)
        np.copyto(self.input_host_mem,x.ravel())
        
        # Transfert de l'entrée vers le GPU
        self.cudactx = cuda.Device(0).retain_primary_context()
        self.cudactx.push()
        cuda.memcpy_htod(self.input_device_mem, self.input_host_mem)
        
        # Appel du modèle
        self.context.execute(batch_size=1, bindings=self.bindings)
        
        # Récupération des sorties
        for i in range(len(self.output_host_mem)):
            cuda.memcpy_dtoh(self.output_host_mem[i], self.output_device_mem[i])
        self.cudactx.pop()

        # Affiche le rectangle sur les objets détectés
        for i in range(2):
            ymin = int(320 * self.output_host_mem[1][0,i,0])
            xmin = int(320 * self.output_host_mem[1][0,i,1])
            ymax = int(320 * self.output_host_mem[1][0,i,2])
            xmax = int(320 * self.output_host_mem[1][0,i,3])

            cv2.rectangle(self.image, (xmin,ymin),
                          (xmax, ymax),
                          (255, 0, 0), 1)
            cv2.putText(self.image,
                        str(self.classes.get(1+self.output_host_mem[3][0,i])),
                        (xmin,ymin+20),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.6,
                        (0, 255, 0),
                        1,
                        cv2.LINE_AA)

    # Lecture d'une frame
    def capture_image(self):
        re, image = self.cap.read()
        if re:
            image_resized = cv2.resize(image,(int(self.display_width),int(self.display_height)))
            return image_resized
        else:
            return self.image
        
    def run(self):
        while True:
            if self.camera_on is True:
                self.image = self.capture_image()
                self.Calcul()
                self.widget_image.value = bgr8_to_jpeg(self.image)
                time.sleep(0.001)

    # Définition du pipeline pour la caméra CSI
    def _gstreamer_pipeline_CSI(self):
        return("nvarguscamerasrc sensor-id=%d ! "
                "video/x-raw(memory:NVMM),"
                "width=(int)%d,height=(int)%d,"
                "format=(string)NV12, framerate=(fraction)%d/1 ! "
                "nvvidconv flip-method=%d ! "
                "video/x-raw,"
                "width=(int)%d,height=(int)%d,"
                "format=(string)BGRx ! videoconvert ! "
                "video/x-raw, format=(string)BGR ! "
                "appsink drop=true"
        %(self.capture_device,self.capture_width,self.capture_height,self.fps,self.flip, self.display_width,self.display_height))

    # Définition du pipeline pour la USB
    def _gstreamer_pipeline_USB(self):
        return("v4l2src device=/dev/video%d ! "
               "video/x-raw, width=(int)%d, height=(int)%d, framerate=(fraction)%d/1 ! "
               "videoflip method=%d ! "
               "videoconvert ! "
               "video/x-raw, format=(string)BGR ! appsink drop=true"
        %(self.capture_device,self.capture_width,self.capture_height,self.fps,self.flip))            

    # Routine pour arrêter le Thread
    def raise_exception(self):
        for id, thread in threading._active.items():
            if thread is self:
                thread_id = id
        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(thread_id,ctypes.py_object(SystemExit))
        if res > 1:
            ctypes.pythonapi.PyThreadState_SetAsyncExc(thread_id, 0)
            print('Exception raise failure')

    def destroy(self):
        self.cudactx.detach()

### Interface de visualisation

In [3]:
import ipywidgets.widgets as widgets
import traitlets
from IPython.display import display
from jetbot import bgr8_to_jpeg

# Création de l'interface
image_widget = widgets.Image(format='jpeg', width=320, height=320)

# Affichage de l'interface
display(widgets.VBox([image_widget]))

VBox(children=(Image(value=b'', format='jpeg', height='320', width='320'),))

In [4]:
trt_inference_wrapper = TRTInference(repertoire_engine="tfmodel_ssd_mobilenet_v2_320x320_coco17_tpu-8/model.engine",
                        repertoire_labels="models/research/object_detection/data/mscoco_complete_label_map.pbtxt",
                        widget_image=image_widget,
                        type_camera="CSI",capture_device=0,
                        capture_width=320,capture_height=320,
                        display_width=320,display_height=320,
                        fps=30,flip=0)

Caméra initialisée
Chargement du moteur...
Allocation mémoire...


In [5]:
trt_inference_wrapper.start()
trt_inference_wrapper.camera_on = True

### Arrêt de la caméra

In [5]:
trt_inference_wrapper.camera_on = False