In [22]:
import cv2
from ultralytics import YOLO

# Load the trained YOLOv8 model
model = YOLO('best.pt')

# Initialize webcam
cap = cv2.VideoCapture(0)

# Manually define class names and assign colors for each class
class_names = [ 'face', 'mouth','eyes',]
colors = {
    'face': (0, 255, 255),   # Blue
    'mouth': (0, 255, 0),   # Green
    'eyes': (0, 0, 255)   # Red
    #yellow
 
}

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform detection
    results = model.predict(source=frame, conf=0.25)
    
    # Draw detections on the frame
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box coordinates
        conf = box.conf[0]  # Confidence score
        label = int(box.cls)  # Class label as integer
        class_name = class_names[label]  # Retrieve class name using the label as index
        
        # Draw the bounding box and label on the frame
        color = colors[class_name]  # Retrieve specific color based on class name
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f'{class_name} {conf:.2f}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        print(box.xyxy)

    # Display the frame with detections
    cv2.imshow('Real-Time Detection', frame)

    # Break on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x800 (no detections), 242.0ms
Speed: 5.4ms preprocess, 242.0ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 179.0ms
Speed: 3.0ms preprocess, 179.0ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 800)
tensor([[501.3796, 178.9174, 855.5679, 601.1263]])

0: 480x800 1 Face, 1 Talking, 1 eyes, 181.3ms
Speed: 2.1ms preprocess, 181.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 800)
tensor([[502.6815, 183.9787, 831.8563, 584.8322]])
tensor([[576.5712, 401.1606, 744.6957, 514.9584]])
tensor([[526.2162, 261.3415, 787.5791, 366.2367]])

0: 480x800 1 Face, 1 Talking, 1 eyes, 172.5ms
Speed: 2.4ms preprocess, 172.5ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)
tensor([[518.3859, 197.8112, 841.5974, 593.9397]])
tensor([[545.0472, 271.2075, 803.3475, 375.8563]])
tensor([[583.7662, 411.6631, 751.1182, 525.9388]])

0: 480x800 1 Face, 1 Talking, 1 eyes, 167.5ms
Speed: 2.1ms preprocess, 167.5ms inference, 0

In [23]:
import cv2
from ultralytics import YOLO

# Load the trained YOLOv8 model
model = YOLO('best.pt')

# Initialize webcam
cap = cv2.VideoCapture(0)

# Retrieve frame dimensions
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # Capture the width of the frame
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # Capture the height of the frame
center_screen = (width // 2, height // 2)  # Calculate the center coordinates

# Define class names and assign colors
class_names = ['face', 'mouth', 'eyes']
colors = {
    'face': (0, 255, 255),   # Cyan
    'mouth': (0, 255, 0),    # Green
    'eyes': (0, 0, 255)      # Red
}

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform detection
    results = model.predict(source=frame, conf=0.25)

    # Draw detections on the frame
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box coordinates
        conf = box.conf[0]  # Confidence score
        label = int(box.cls)  # Class label as integer
        class_name = class_names[label]  # Retrieve class name using the label as index
        color = colors[class_name]  # Retrieve specific color based on class name

        # Draw the bounding box and label on the frame
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f'{class_name} {conf:.2f}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Display the center of the screen
    cv2.circle(frame, center_screen, 5, (255, 0, 0), -1)  # Draw a blue dot at the center

    # Display the frame with detections
    cv2.imshow('Real-Time Detection', frame)

    # Break on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x800 (no detections), 226.5ms
Speed: 5.9ms preprocess, 226.5ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 182.5ms
Speed: 3.9ms preprocess, 182.5ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 174.7ms
Speed: 2.0ms preprocess, 174.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 177.2ms
Speed: 2.0ms preprocess, 177.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 174.1ms
Speed: 2.3ms preprocess, 174.1ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 176.9ms
Speed: 2.4ms preprocess, 176.9ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 197.8ms
Speed: 2.9ms preprocess, 197.8ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 177.8ms
Speed: 1.9ms preprocess, 177.8ms inference, 0.6ms postprocess per

In [24]:
import cv2
from ultralytics import YOLO

# Load the trained YOLOv8 model
model = YOLO('best.pt')

# Initialize webcam
cap = cv2.VideoCapture(0)

# Retrieve frame dimensions
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # Capture the width of the frame
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # Capture the height of the frame
center_screen = (width // 2, height // 2)  # Calculate the center coordinates

# Define class names and assign colors
class_names = ['face', 'mouth', 'eyes']
colors = {
    'face': (0, 255, 255),   # Cyan
    'mouth': (0, 255, 0),    # Green
    'eyes': (0, 0, 255)      # Red
}

# Threshold for considering a detection as "target"
target_threshold = min(width, height) * 0.1  # 10% of the smallest dimension

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform detection
    results = model.predict(source=frame, conf=0.25)

    # Draw detections on the frame
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box coordinates
        center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2  # Center of the bounding box
        conf = box.conf[0]  # Confidence score
        label = int(box.cls)  # Class label as integer
        class_name = class_names[label]  # Retrieve class name using the label as index

        # Calculate distance to the center of the screen
        dist_to_center = ((center_x - center_screen[0]) ** 2 + (center_y - center_screen[1]) ** 2) ** 0.5

        # Determine color and label based on distance
        if dist_to_center < target_threshold and class_name == 'face':
            color = (255, 0, 0)  # Red color for target
            label_text = f'Target {conf:.2f}'
        else:
            color = colors[class_name]
            label_text = f'{class_name} {conf:.2f}'

        # Draw the bounding box and label on the frame
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, label_text, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Display the center of the screen
    cv2.circle(frame, center_screen, 5, (255, 255, 0), -1)  # Yellow dot at the center

    # Display the frame with detections
    cv2.imshow('Real-Time Detection', frame)

    # Break on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x800 (no detections), 176.0ms
Speed: 2.9ms preprocess, 176.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 173.9ms
Speed: 2.3ms preprocess, 173.9ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 169.4ms
Speed: 1.8ms preprocess, 169.4ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 166.4ms
Speed: 1.8ms preprocess, 166.4ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 202.1ms
Speed: 1.8ms preprocess, 202.1ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 182.3ms
Speed: 1.8ms preprocess, 182.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 184.2ms
Speed: 1.8ms preprocess, 184.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 171.2ms
Speed: 1.9ms pr

In [25]:
import cv2
from ultralytics import YOLO
import numpy as np

# Load the trained YOLOv8 model
model = YOLO('best.pt')

# Initialize webcam
cap = cv2.VideoCapture(0)

# Get screen center
ret, frame = cap.read()
height, width, _ = frame.shape
center_screen = (width // 2, height // 2)

# Define class names and assign colors
class_names = ['face', 'mouth', 'eyes']
colors = {
    'face': (0, 255, 255),  # Cyan for regular faces
    'mouth': (0, 255, 0),   # Green
    'eyes': (0, 0, 255)     # Red
}
target_color = (255, 0, 0)  # Red for the target face

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform detection
    results = model.predict(source=frame, conf=0.25)

    closest_face = None
    min_distance = float('inf')

    # Draw detections on the frame
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box coordinates
        conf = box.conf[0]  # Confidence score
        label = int(box.cls)  # Class label as integer
        class_name = class_names[label]  # Retrieve class name using the label as index

        # Calculate the center of the bounding box
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2

        # Calculate distance to the center of the screen
        distance = np.sqrt((center_x - center_screen[0]) ** 2 + (center_y - center_screen[1]) ** 2)
        if distance < min_distance:
            min_distance = distance
            closest_face = (x1, y1, x2, y2, conf, class_name)

        # Draw the bounding box and label on the frame
        color = colors[class_name]
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f'{class_name} {conf:.2f}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Highlight the closest face
    if closest_face:
        x1, y1, x2, y2, conf, class_name = closest_face
        cv2.rectangle(frame, (x1, y1), (x2, y2), target_color, 2)
        cv2.putText(frame, 'Target {:.2f}'.format(conf), (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, target_color, 2)

    # Display the frame with detections
    cv2.imshow('Real-Time Detection', frame)

    # Break on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x800 1 Face, 1 Talking, 231.3ms
Speed: 8.6ms preprocess, 231.3ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 179.8ms
Speed: 2.1ms preprocess, 179.8ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 178.2ms
Speed: 2.1ms preprocess, 178.2ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 191.4ms
Speed: 2.2ms preprocess, 191.4ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 1 eyes, 169.1ms
Speed: 2.1ms preprocess, 169.1ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 1 eyes, 169.2ms
Speed: 2.2ms preprocess, 169.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 1 eyes, 180.6ms
Speed: 2.3ms preprocess, 180.6ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Fac

In [32]:
import cv2
from ultralytics import YOLO
import numpy as np

# Load the trained YOLOv8 model
model = YOLO('best.pt')

# Initialize webcam
cap = cv2.VideoCapture(0)

# Get screen center
ret, frame = cap.read()
height, width, _ = frame.shape
center_screen = (width // 2, height // 2)

# Define class names and assign colors
class_names = ['face', 'mouth', 'eyes']
colors = {
    'face': (0, 255, 255),  # Cyan for regular faces
    'mouth': (0, 255, 0),   # Green
    'eyes': (0, 0, 255)     # Red
}
target_color = (255, 0, 0)  # Red for the target face

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform detection
    results = model.predict(source=frame, conf=0.25)

    closest_face = None
    min_distance = float('inf')

    # Draw detections on the frame
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box coordinates
        conf = box.conf[0]  # Confidence score
        label = int(box.cls)  # Class label as integer
        class_name = class_names[label]  # Retrieve class name using the label as index

        # Calculate the center of the bounding box
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2

        # Draw the bounding box and label on the frame
        color = colors[class_name]
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f'{class_name} {conf:.2f}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Check if the detected class is "face" before calculating distance
        if class_name == 'face':
            # Calculate distance to the center of the screen
            distance = np.sqrt((center_x - center_screen[0]) ** 2 + (center_y - center_screen[1]) ** 2)
            if distance < min_distance:
                min_distance = distance
                closest_face = (x1, y1, x2, y2, conf, class_name)

    # Highlight the closest face if it's a "face" class
    if closest_face:
        x1, y1, x2, y2, conf, class_name = closest_face
        cv2.rectangle(frame, (x1, y1), (x2, y2), target_color, 2)
        cv2.putText(frame, 'Target {:.2f}'.format(conf), (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, target_color, 2)

    # Display the frame with detections
    cv2.imshow('Real-Time Detection', frame)

    # Break on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x800 1 Face, 2 Talkings, 1 eyes, 151.0ms
Speed: 1.9ms preprocess, 151.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 178.8ms
Speed: 8.8ms preprocess, 178.8ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 2 Faces, 1 Talking, 1 eyes, 148.4ms
Speed: 1.9ms preprocess, 148.4ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 2 Talkings, 183.0ms
Speed: 1.9ms preprocess, 183.0ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 150.9ms
Speed: 2.2ms preprocess, 150.9ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 2 Talkings, 1 eyes, 151.8ms
Speed: 1.8ms preprocess, 151.8ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 154.3ms
Speed: 3.0ms preprocess, 154.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Ta

In [None]:
# import cv2
# from ultralytics import YOLO
# import numpy as np

# # Load the trained YOLOv8 model
# model = YOLO('best.pt')

# # Initialize webcam
# cap = cv2.VideoCapture(0)

# # Get screen center
# ret, frame = cap.read()
# height, width, _ = frame.shape
# center_screen = (width // 2, height // 2)

# # Define class names and assign colors
# class_names = ['face', 'mouth', 'eyes']
# colors = {
#     'face': (0, 255, 255),  # Cyan for regular faces
#     'mouth': (0, 255, 0),   # Green
#     'eyes': (0, 0, 255)     # Red
# }
# target_color = (255, 0, 0)  # Red for the target face

# def calculate_score(distance, has_face, has_eyes, has_mouth):
#     score = 0
#     if has_face:
#         score += 5
#     if has_eyes:
#         score += 3
#     if has_mouth:
#         score += 2
#     # Adjust the distance penalty to scale as needed
#     distance_penalty = distance / 100
#     return score - distance_penalty

# best_target = None
# highest_score = -np.inf

# while True:
#     ret, frame = cap.read()
#     if not ret:
#         break

#     # Perform detection
#     results = model.predict(source=frame, conf=0.25)

#     detections = {
#         'face': None,
#         'eyes': None,
#         'mouth': None
#     }

#     # Identify detections
#     for box in results[0].boxes:
#         x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box coordinates
#         conf = box.conf[0]  # Confidence score
#         label = int(box.cls)  # Class label as integer
#         class_name = class_names[label]  # Retrieve class name using the label as index

#         # Store the best detection per category
#         if detections[class_name] is None or detections[class_name][1] < conf:
#             detections[class_name] = ((x1, y1, x2, y2), conf)

#     # Calculate the center of the face detection (if any)
#     if detections['face']:
#         x1, y1, x2, y2, conf = *detections['face'][0], detections['face'][1]
#         center_x = (x1 + x2) // 2
#         center_y = (y1 + y2) // 2
#         distance = np.sqrt((center_x - center_screen[0]) ** 2 + (center_y - center_screen[1]) ** 2)
        
#         # Calculate score
#         score = calculate_score(distance, 'face' in detections, 'eyes' in detections, 'mouth' in detections)
#         if score > highest_score:
#             highest_score = score
#             best_target = (x1, y1, x2, y2, conf, class_name, score)

#     # Draw all detections
#     for class_name, detection in detections.items():
#         if detection:
#             x1, y1, x2, y2 = detection[0]
#             cv2.rectangle(frame, (x1, y1), (x2, y2), colors[class_name], 2)
#             cv2.putText(frame, f'{class_name} {detection[1]:.2f}', (x1, y1 - 10),
#                         cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[class_name], 2)

#     # Highlight the best target
#     if best_target:
#         x1, y1, x2, y2, conf, class_name, score = best_target
#         cv2.rectangle(frame, (x1, y1), (x2, y2), target_color, 3)
#         cv2.putText(frame, f'Target {conf:.2f} Score: {score:.1f}', (x1, y1 - 20),
#                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, target_color, 2)

#     # Display the frame with detections
#     cv2.imshow('Real-Time Detection', frame)

#     # Break on pressing 'q'
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# cap.release()
# cv2.destroyAllWindows()



0: 480x800 1 Face, 217.0ms
Speed: 5.7ms preprocess, 217.0ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 2 eyess, 182.5ms
Speed: 2.7ms preprocess, 182.5ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 2 eyess, 198.3ms
Speed: 2.3ms preprocess, 198.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 178.2ms
Speed: 2.0ms preprocess, 178.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 2 eyess, 202.4ms
Speed: 2.1ms preprocess, 202.4ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 194.4ms
Speed: 2.9ms preprocess, 194.4ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 189.0ms
Speed: 2.2ms preprocess, 189.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 207.4ms
Speed: 2.1ms preprocess, 207.4ms infere

KeyboardInterrupt: 

In [33]:
import cv2
from ultralytics import YOLO
import numpy as np

# Load the trained YOLOv8 model
model = YOLO('best.pt')

# Initialize webcam
cap = cv2.VideoCapture(0)

# Get screen center
ret, frame = cap.read()
height, width, _ = frame.shape
center_screen = (width // 2, height // 2)

# Define class names and assign colors
class_names = ['face', 'mouth', 'eyes']
colors = {
    'face': (0, 255, 255),  # Cyan for regular faces
    'mouth': (0, 255, 0),   # Green
    'eyes': (0, 0, 255)     # Red
}
target_color = (255, 0, 0)  # Red for the target face

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform detection
    results = model.predict(source=frame, conf=0.25)

    closest_face = None
    min_distance = float('inf')

    # Draw detections on the frame
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box coordinates
        conf = box.conf[0]  # Confidence score
        label = int(box.cls)  # Class label as integer
        class_name = class_names[label]  # Retrieve class name using the label as index

        # Calculate the center of the bounding box
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2

        # Draw the bounding box and label on the frame
        color = colors[class_name]
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f'{class_name} {conf:.2f}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Check if the detected class is "face" before calculating distance
        if class_name == 'face':
            # Calculate distance to the center of the screen
            distance = np.sqrt((center_x - center_screen[0]) ** 2 + (center_y - center_screen[1]) ** 2)
            if distance < min_distance:
                min_distance = distance
                closest_face = (x1, y1, x2, y2, conf, class_name)

    # Highlight the closest face if it's a "face" class
    if closest_face:
        x1, y1, x2, y2, conf, class_name = closest_face
        cv2.rectangle(frame, (x1, y1), (x2, y2), target_color, 2)
        cv2.putText(frame, 'Target {:.2f}'.format(conf), (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, target_color, 2)

    # Draw a dot at the center of the screen
    cv2.circle(frame, center_screen, 5, (0, 255, 0), -1)  # Green dot

    # Display the frame with detections
    cv2.imshow('Real-Time Detection', frame)

    # Break on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x800 (no detections), 363.2ms
Speed: 9.0ms preprocess, 363.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 1 eyes, 219.1ms
Speed: 2.3ms preprocess, 219.1ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 334.1ms
Speed: 2.4ms preprocess, 334.1ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 1 eyes, 211.2ms
Speed: 2.5ms preprocess, 211.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 184.2ms
Speed: 2.2ms preprocess, 184.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 188.2ms
Speed: 2.1ms preprocess, 188.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 167.2ms
Speed: 2.3ms preprocess, 167.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 153.0ms
Spe

In [1]:
import cv2
from ultralytics import YOLO
import numpy as np

# Load the trained YOLOv8 model
model = YOLO('best.pt')

# Initialize webcam
cap = cv2.VideoCapture(0)

# Get screen center
ret, frame = cap.read()
height, width, _ = frame.shape
center_screen = (width // 2, height // 2)

# Define class names and assign colors
class_names = ['face', 'mouth', 'eyes']
colors = {
    'face': (0, 255, 255),  # Cyan for regular faces
    'mouth': (0, 255, 0),   # Green
    'eyes': (0, 0, 255)     # Red
}
target_color = (255, 0, 0)  # Red for the target face

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform detection
    results = model.predict(source=frame, conf=0.25)

    closest_face = None
    min_distance = float('inf')

    # Draw detections on the frame
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box coordinates
        conf = box.conf[0]  # Confidence score
        label = int(box.cls)  # Class label as integer
        class_name = class_names[label]  # Retrieve class name using the label as index

        # Calculate the center of the bounding box
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2

        # Draw the bounding box and label on the frame
        color = colors[class_name]
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f'{class_name} {conf:.2f}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Check if the detected class is "face" before calculating distance
        if class_name == 'face':
            # Calculate distance to the center of the screen
            distance = np.sqrt((center_x - center_screen[0]) ** 2 + (center_y - center_screen[1]) ** 2)
            if distance < min_distance:
                min_distance = distance
                closest_face = (x1, y1, x2, y2, conf, class_name)

    # Highlight the closest face if it's a "face" class
    if closest_face:
        x1, y1, x2, y2, conf, class_name = closest_face
        cv2.rectangle(frame, (x1, y1), (x2, y2), target_color, 2)
        cv2.putText(frame, 'Target {:.2f}'.format(conf), (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, target_color, 2)

    # Draw a dot at the center of the screen
    cv2.circle(frame, center_screen, 5, (0, 255, 0), -1)  # Green dot

    # Display the frame with detections
    cv2.imshow('Real-Time Detection', frame)

    # Break on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x800 1 Face, 1 Talking, 1 eyes, 350.4ms
Speed: 8.3ms preprocess, 350.4ms inference, 15.1ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 211.7ms
Speed: 2.2ms preprocess, 211.7ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 1 eyes, 288.8ms
Speed: 8.0ms preprocess, 288.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 282.4ms
Speed: 4.4ms preprocess, 282.4ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 238.3ms
Speed: 3.8ms preprocess, 238.3ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 1 eyes, 181.8ms
Speed: 2.3ms preprocess, 181.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 173.3ms
Speed: 2.5ms preprocess, 173.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talkin

2024-12-14 09:59:30.285 python[43234:3007539] +[IMKClient subclass]: chose IMKClient_Modern
2024-12-14 09:59:30.286 python[43234:3007539] +[IMKInputSession subclass]: chose IMKInputSession_Modern


0: 480x800 1 Face, 1 eyes, 181.3ms
Speed: 2.4ms preprocess, 181.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 183.6ms
Speed: 2.6ms preprocess, 183.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 164.5ms
Speed: 2.2ms preprocess, 164.5ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 eyes, 171.7ms
Speed: 2.2ms preprocess, 171.7ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 1 eyes, 170.9ms
Speed: 2.3ms preprocess, 170.9ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 1 eyes, 238.1ms
Speed: 2.4ms preprocess, 238.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 256.0ms
Speed: 5.5ms preprocess, 256.0ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 Face, 1 Talking, 289.2ms