### Deep Learning for autonomous vehicules
# Milestone 1: Yolov5 + Mediapipe hand
Group : Aziz Belkhiria Nadia Hadjmbarek Ilyas Ben Adada Vincent Naayem

### Define detection mode : 'detection' (on the robot) , 'testing' (on the notebook)

In [115]:
mode = 'testing'
first = True

In [None]:
!pip install -qr https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt 

In [99]:
import torch
import math
import cv2
from PIL import Image
from matplotlib import pyplot as plt
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model 

In [80]:
# Model definition for yolov5
model = torch.hub.load('ultralytics/yolov5', 'yolov5n', pretrained=True)
model.classes =[0,] #Only detects humans

Using cache found in C:\Users\Aziz/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2022-4-27 torch 1.11.0+cpu CPU

Fusing layers... 
YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients
Adding AutoShape... 


In [81]:
#model definition for mediapipe
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=4, min_detection_confidence=0.6)
mpDraw = mp.solutions.drawing_utils
model_mp= load_model('hand-gesture-recognition-code/mp_hand_gesture')

In [82]:
def find_centers (results,frame):
    i = 0
    centers = np.zeros(np.shape(results)[0])
    for (xmin,ymin,xmax,ymax,conf,classe) in results:
        frame = cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,0,255),2) 
        centers[i] = (xmin + xmax)/2
        i = i+1
    return centers, frame

In [119]:
def closest (centers, landmark):
    min_dist = 1000
    idx = 0
    for i in range(np.size(centers)): 
        dist = abs(landmark - centers[i])
        if (dist < min_dist):
            idx = i
            min_dist = dist
    return idx

In [122]:
cap = cv2.VideoCapture(0)

while cap.isOpened():
    
    ret, frame = cap.read()
    img = frame.copy()
    
    #find persons with yolo
    results = model(img) 
    #find bounding box centers for persons
    centers,frame = find_centers(results.xyxy[0],frame)

    # BGR 2 RGB
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    y , x, c = img.shape

    # Set flag
    img.flags.writeable = False

    # Mediapipe detection
    results_mp = hands.process(img)

    # Set flag to true
    img.flags.writeable = True

    # RGB 2 BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    
    # Find Mediapipe landmarks 
    if results_mp.multi_hand_landmarks:
        for handslms in results_mp.multi_hand_landmarks: # Loop through the hands
            landmarks = []
            for lm in handslms.landmark:
                lmx = int(lm.x * x)
                lmy = int(lm.y * y)
                landmarks.append([lmx, lmy])
                
            # Predict hand gesture    
            prediction = model_mp.predict([landmarks]) 
            classID = np.argmax(prediction)
               
            if classID == 1:    #If peace sign is detected
                mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)
                # Find the closest person by comparing the position on x axis
                idx = closest(centers,landmarks[0][0])
                # Draw a bounding box around the person of interest
                xminbox = int(results.xyxy[0][idx][0])
                yminbox = int(results.xyxy[0][idx][1])
                xmaxbox = int(results.xyxy[0][idx][2])
                ymaxbox = int(results.xyxy[0][idx][3])
                frame = cv2.rectangle(frame,(xminbox,yminbox),(xmaxbox,ymaxbox),(0,255,0),2)
                
                if (mode == 'detection') and (first == True):
                    ROI = img[yminbox:ymaxbox,xminbox:xmaxbox,:]
                    first = False  
                
    cv2.imshow('Detection', frame)
    
    if first == False and mode == 'detection':
        first = True
        break
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [113]:
ROI =  cv2.cvtColor(ROI, cv2.COLOR_BGR2RGB)
ROI = plt.imshow(ROI)
plt.savefig('detected.png')

In [114]:
image = Image.open('detected.png')
image.show()