In [1]:
import mediapipe as mp
import cv2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision


### Prepare Face Meshing

In [2]:
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=False,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

def get_face_landmarks(image):
    """Detect face landmarks in an image."""
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(image_rgb)
    return results.multi_face_landmarks[0] if results.multi_face_landmarks else None

def draw_face_landmarks(image, landmarks):
    """Draw face landmarks on the image."""
    for lm in landmarks.landmark:
        x = int(lm.x * image.shape[1])
        y = int(lm.y * image.shape[0])
        cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
    return image

### Perpare Object Detector

In [3]:
with open("efficientdet_lite0.tflite", "rb") as f:
    model_data = f.read()
    
base_options = python.BaseOptions(model_asset_buffer=model_data)
options = vision.ObjectDetectorOptions(base_options=base_options,score_threshold=0.5)
detector = vision.ObjectDetector.create_from_options(options)

def detect_objects(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_rgb)
    
    detection_result = detector.detect(mp_image)
    return detection_result

def draw_detection_result(image, detection_result, objects_to_detect=None):

    if not detection_result.detections:
        return image
    
    annotated_image = image.copy()
    
    for detection in detection_result.detections:
        category = detection.categories[0]
        category_name = category.category_name
        probability = round(category.score, 2)
        
        if objects_to_detect and category_name.lower() not in [obj.lower() for obj in objects_to_detect]:
            continue
        
        bbox = detection.bounding_box
        x1 = bbox.origin_x
        y1 = bbox.origin_y
        x2 = x1 + bbox.width
        y2 = y1 + bbox.height
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 0, 255), 2)
        label = f"{category_name}: {probability}"
        
        cv2.putText(
            annotated_image, 
            label, 
            (x1, y1 - 10), 
            cv2.FONT_HERSHEY_SIMPLEX, 
            0.5, 
            (0, 0, 255), 
            2
        )
    
    return annotated_image

In [4]:
COMMON_OBJECTS = ['cell phone', 'laptop', 'book', 'dog', 'bottle', 'cup', 'pen', 'headphones']

In [5]:
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    processed_frame = frame.copy()
    
    detection_result = detect_objects(processed_frame)
    processed_frame = draw_detection_result(processed_frame, detection_result, objects_to_detect=COMMON_OBJECTS)
    
    landmarks = get_face_landmarks(processed_frame)
    if landmarks:
        processed_frame = draw_face_landmarks(processed_frame, landmarks)
    
    cv2.imshow('Combined Detection', processed_frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()