In [None]:
Task: 

Use any resources (images, networks, CNNs, LLMs, Copilot, ChatGPT, etc.) to create an AI system that classifies 
in real time in the video stream from the webcam the owner of the laptop, 
the owner's pet (if you don't have a pet, borrow one), or if there is another person or nobody. 

Your solution should respect ETHICAL guidelines and prioritize user privacy.
It is essential to ensure that any data collected during this classification process is handled securely 
and with the explicit consent of the individuals involved. Moreover, the system should be designed to operate transparently, 
allowing users to understand how their data is used and providing options for them to opt out if they choose.

In [24]:
pip install mediapipe opencv-python

Note: you may need to restart the kernel to use updated packages.


In [37]:
import cv2
import mediapipe as mp
import numpy as np

mp_face_detection = mp.solutions.face_detection
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils

face_detection = mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)

owner_embedding = None
owner_img = cv2.imread("owner.jpg")
if owner_img is not None:
    owner_rgb = cv2.cvtColor(owner_img, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(owner_rgb)
    if results.multi_face_landmarks:
        landmarks = results.multi_face_landmarks[0].landmark
        owner_embedding = np.array([[lm.x, lm.y] for lm in landmarks]).flatten()
        owner_embedding -= np.mean(owner_embedding)
        owner_embedding /= np.linalg.norm(owner_embedding)
    else:
        print("Warning: No landmarks found in 'owner.jpg'")
else:
    print("Warning: 'owner.jpg' not found")

orb = cv2.ORB_create()
pet_img = cv2.imread("pet.jpg", cv2.IMREAD_GRAYSCALE)
pet_kp, pet_des = orb.detectAndCompute(pet_img, None) if pet_img is not None else (None, None)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

def extract_face_embedding(frame):
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)
    if results.multi_face_landmarks:
        landmarks = results.multi_face_landmarks[0].landmark
        embedding = np.array([[lm.x, lm.y] for lm in landmarks]).flatten()
        embedding -= np.mean(embedding)
        norm = np.linalg.norm(embedding)
        if norm == 0:
            return None
        return embedding / norm
    return None

def is_owner_face(current_embedding):
    if owner_embedding is None or current_embedding is None:
        return False
    similarity = np.dot(owner_embedding, current_embedding)
    return similarity > 0.95

def is_pet_present(frame):
    if pet_des is None:
        return False
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    kp, des = orb.detectAndCompute(gray, None)
    if des is None:
        return False
    matches = bf.match(pet_des, des)
    good_matches = [m for m in matches if m.distance < 50]
    return len(good_matches) > 10  # tune threshold

cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    label = "Nobody"
    detections = face_detection.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).detections
    if detections:
        label = "Other Person"
        current_embedding = extract_face_embedding(frame)
        if is_owner_face(current_embedding):
            label = "Owner"
    elif is_pet_present(frame):
        label = "Pet"

    cv2.putText(frame, label, (30, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 0, 0), 2)
    if detections:
        for d in detections:
            mp_drawing.draw_detection(frame, d)

    cv2.imshow("Webcam Classification", frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()