In [1]:
!pip install facenet-pytorch



In [2]:
!pip install opencv-python
!pip install mediapipe



In [3]:
import cv2
import mediapipe as mp
import numpy as np
from facenet_pytorch import MTCNN
from PIL import Image
from matplotlib import pyplot as plt
import math



In [None]:
class HandDetector():
    def __init__(self, mode=False, maxHands=20, modelComplexity=1, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.modelComplex = modelComplexity
        self.detectionCon = detectionCon
        self.trackCon = trackCon
        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands,
                                        self.modelComplex, self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils

    def findHands(self, img, draw=True, face_y_threshold=None):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)
        hand_count = 0
        raised_hand_count = 0
        if self.results.multi_hand_landmarks:
            hand_count = len(self.results.multi_hand_landmarks)
            for handLms in self.results.multi_hand_landmarks:
                wrist_y = handLms.landmark[0].y * img.shape[0]  # y-coordinate of the wrist landmark
                if face_y_threshold is None or wrist_y < face_y_threshold:
                    raised_hand_count += 1
                    if draw:
                        self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
                        self.draw_bounding_box(img, handLms)
        return img, hand_count, raised_hand_count

    def findPosition(self, img, handNo=0, draw=True):
        lmlist = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                h, w, c = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                lmlist.append([id, cx, cy])
                if draw:
                    cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)
        return lmlist

    def draw_bounding_box(self, img, hand_landmarks):
        x_min, x_max = float('inf'), float('-inf')
        y_min, y_max = float('inf'), float('-inf')
        for lm in hand_landmarks.landmark:
            x, y = int(lm.x * img.shape[1]), int(lm.y * img.shape[0])
            if x < x_min:
                x_min = x
            if x > x_max:
                x_max = x
            if y < y_min:
                y_min = y
            if y > y_max:
                y_max = y
        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

class FacePoseDetector():
    def __init__(self):
        self.mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20,
                           thresholds=[0.6, 0.7, 0.7], factor=0.709,
                           post_process=True, device='cpu')

    def detect_faces(self, frame):
        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        bbox, prob, landmarks = self.mtcnn.detect(image, landmarks=True)
        return bbox, landmarks

    def npAngle(self, a, b, c):
        ba = np.array(a) - np.array(b)
        bc = np.array(c) - np.array(b)
        cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
        return np.degrees(np.arccos(cosine_angle))

def main():
    cap = cv2.VideoCapture(0)
    hand_detector = HandDetector(maxHands=2)
    face_pose_detector = FacePoseDetector()

    while True:
        success, frame = cap.read()
        if not success:
            continue

        # Face pose detection to determine threshold for raised hands
        bbox, landmarks = face_pose_detector.detect_faces(frame)
        if landmarks is not None:
            for landmark in landmarks:
                if landmark is not None:
                    right_eye = landmark[0]
                    left_eye = landmark[1]
                    nose = landmark[2]
                    angR = face_pose_detector.npAngle(right_eye, left_eye, nose)
                    angL = face_pose_detector.npAngle(left_eye, right_eye, nose)

                    # Determine face orientation
                    if angR > 50 and angL > 50:
                        face_orientation = 'Frontal'
                    elif angR < angL:
                        face_orientation = 'Right Profile'
                    else:
                        face_orientation = 'Left Profile'
                    
                    # Draw lines for angles
                    cv2.line(frame, tuple(np.array(right_eye, dtype=int)), tuple(np.array(left_eye, dtype=int)), (0, 255, 0), 2)
                    cv2.line(frame, tuple(np.array(left_eye, dtype=int)), tuple(np.array(nose, dtype=int)), (0, 255, 0), 2)
                    cv2.line(frame, tuple(np.array(right_eye, dtype=int)), tuple(np.array(nose, dtype=int)), (0, 255, 0), 2)

                    # Display angles and orientation
                    cv2.putText(frame, f"Angles: L={int(angL)}, R={int(angR)}", (10, 130),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)
                    cv2.putText(frame, f"Orientation: {face_orientation}", (10, 160),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        face_y_threshold = None
        if landmarks is not None and len(landmarks) > 0 and landmarks[0] is not None:
            # The MTCNN returns a list of arrays; each array corresponds to a set of landmarks for one face
            # Assuming we are using the y-coordinate of the lowest face landmark as the threshold
            face_y_threshold = min(land[1] for land in landmarks[0]) * frame.shape[0] if landmarks[0] is not None else None

        # Hand detection using the dynamically set face threshold
        frame, hand_count, raised_hand_count = hand_detector.findHands(frame, draw=True, face_y_threshold=face_y_threshold)

        if hand_count > 0:
            lmlist = hand_detector.findPosition(frame)  # Get landmark list if hands are detected
            # Optionally process lmlist here for gesture recognition or further details

        total_score = raised_hand_count * 5  # Score based on raised hands count
        cv2.putText(frame, f"Total Score: {total_score}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX,
                    1, (255, 0, 255), 3, cv2.LINE_AA)

        cv2.imshow("Image", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()
    



INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
[W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware.
