In [20]:
# Import necessary libraries
import cv2
import time
import math
import numpy as np
from ultralytics import YOLO

In [21]:
# Constants and configurations for the game
HAND_LABELS = {0: 'paper', 1: 'rock', 2: 'scissors'}
MOTION_THRESHOLD = 50
COUNTDOWN_TIME = 3
WAIT_TIME = 2
MESSAGE_TIME = 4
END_ROUND_TIME = 1
BAR_HEIGHT = 45
WIN_SCORE = 1
FONT = cv2.FONT_HERSHEY_SIMPLEX
WINDOW_NAME = 'Rock Paper Scissors'

In [22]:
# Initialize global variables
message = " "                                           # Message to display
start_time = None                                       # Start time for countdown
message_start_time = None                               # Start time for displaying messages
game_started = False                                    # Flag to check if the game has started
countdown_active = False                                # Flag to check if countdown is active
detect_both_hands = False                               # Flag to check if both hands are detected
winner_determined = False                               # Flag to check if the winner is determined
end_round_start_time = time.time()                      # Start time for ending the round
scores = {"player1": 0, "player2": 0}                   # Scores for both players
prev_moves = {"player1": None, "player2": None}         # Previous moves of both players
prev_positions = {"player1": None, "player2": None}     # Previous positions of both players
motion_check = {"player1": False, "player2": False}     # Motion check for both players
player_masks = {"player1": False, "player2": False}     # Masks status for both players
player_crowns = {"player1": False, "player2": False}    # Crowns status for both players

In [23]:
# Load the YOLO models for hand and face detection
hand_model = YOLO('best.pt', task='detect')
face_model = YOLO('yolov11n-face.pt', task='detect')

# Load the mask and crown images with alpha channel
mask_img = cv2.imread('mask.png', cv2.IMREAD_UNCHANGED)
crown_img = cv2.imread('crown.png', cv2.IMREAD_UNCHANGED)

# Check if the crown image has an alpha channel
if crown_img.shape[2] != 4:
    # Add an alpha channel to the crown image if it doesn't have one
    b, g, r = cv2.split(crown_img)
    alpha = np.ones(b.shape, dtype=b.dtype) * 255  # Create a dummy alpha channel
    crown_img = cv2.merge((b, g, r, alpha))

In [24]:
def mirror_frame(frame):
    """
    Mirrors the given frame horizontally.

    Parameters:
    frame (numpy.ndarray): The input frame to be mirrored.

    Returns:
    numpy.ndarray: The mirrored frame.
    """
    return cv2.flip(frame, 1)

In [25]:
def annotate_frame(frame, hand_model, face_model):
    """
    Annotates a video frame with hand and face predictions using YOLO models.

    Args:
        frame (numpy.ndarray): The input video frame to be annotated.
        hand_model (YOLO): The YOLO model used for hand detection.
        face_model (YOLO): The YOLO model used for face detection.

    Returns:
        tuple: A tuple containing:
            - combined_frame (numpy.ndarray): The frame annotated with both hand and face predictions.
            - hand_result (YOLOResult): The result of the hand detection.
            - face_result (YOLOResult): The result of the face detection.
    """
    # Predict hands using YOLO model
    hand_results = hand_model.predict(source=frame, device='0')
    hand_result = hand_results[0]
    hand_annotated_frame = hand_result.plot()

    # Predict faces using YOLO model
    face_results = face_model.predict(source=frame, device='0')
    face_result = face_results[0]
    face_annotated_frame = face_result.plot()

    # Combine the annotations from both models
    combined_frame = cv2.addWeighted(
        hand_annotated_frame, 0.5, face_annotated_frame, 0.5, 0)

    return combined_frame, hand_result, face_result

In [26]:
def display_status(frame, scores, remaining_time, message, winner):
    """
    Display the game status on the given frame.

    Parameters:
    frame (numpy.ndarray): The image frame where the status will be displayed.
    scores (dict): A dictionary containing the scores of the players with keys "player1" and "player2".
    remaining_time (int): The remaining time for the countdown or wait period.
    message (str): A message to be displayed on the frame.
    winner (str): The name of the winning player.

    Returns:
    None
    """
    cv2.rectangle(
        frame, (0, 0), (frame.shape[1], BAR_HEIGHT), (225, 225, 0), -1)
    cv2.putText(frame, f'P1: {scores["player1"]}/{WIN_SCORE}  P2: {scores["player2"]}/{WIN_SCORE}  {"Countdown" if remaining_time >= 0 else "Wait"}: {abs(remaining_time) if countdown_active else "-"}',
                (20, 35), FONT, 1, (255, 255, 255), 2)

    if scores["player1"] == WIN_SCORE or scores["player2"] == WIN_SCORE:
        cv2.putText(frame, f'{winner} Wins the Game!',
                    (110, BAR_HEIGHT + 40), FONT, 1, (0, 255, 0), 2)
    else:
        cv2.putText(frame, message, (0, BAR_HEIGHT + 30),
                    FONT, 0.7, (0, 0, 255), 2)

In [27]:
def detect_players_hands(hand_result, frame):
    """
    Detects the hands of two players in a given frame.

    Args:
        hand_result: An object containing the results of hand detection, including bounding boxes and class IDs.
        frame: The current frame of the video or image in which hands are being detected.

    Returns:
        tuple: A tuple containing:
            - player_hands (dict): A dictionary with keys "player1" and "player2", mapping to the detected hand labels for each player.
            - player_positions (dict): A dictionary with keys "player1" and "player2", mapping to the positions (x, y) of the detected hands for each player.
            - detect_both_hands (bool): A boolean indicating whether both players' hands were detected in the frame.
    """
    player_hands = {"player1": None, "player2": None}
    player_positions = {"player1": None, "player2": None}
    detect_both_hands = False

    for box in hand_result.boxes:
        if len(hand_result.boxes) == 2:
            detect_both_hands = True
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        class_id = int(box.cls[0])
        label = HAND_LABELS[class_id]

        if x1 < frame.shape[1] // 2:
            player_hands["player1"] = label
            player_positions["player1"] = (x1, y1)
        else:
            player_hands["player2"] = label
            player_positions["player2"] = (x1, y1)

    return player_hands, player_positions, detect_both_hands

In [28]:
def detect_players_faces(face_result, frame):
    """
    Detects and assigns face positions to player1 and player2 based on their location in the frame.

    Args:
        face_result: An object containing the results of face detection, including bounding boxes.
        frame: The current video frame as a numpy array.

    Returns:
        A dictionary with keys "player1" and "player2", each containing a tuple of coordinates (x1, y1, x2, y2)
        representing the bounding box of the detected face. If no face is detected for a player, the value is None.

    Example:
        face_positions = detect_players_faces(face_result, frame)
        # face_positions = {"player1": (x1, y1, x2, y2), "player2": (x1, y1, x2, y2)}
    """
    face_positions = {"player1": None, "player2": None}
    for box in face_result.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        if x1 < frame.shape[1] // 2:
            face_positions["player1"] = (x1, y1, x2, y2)
        else:
            face_positions["player2"] = (x1, y1, x2, y2)
    return face_positions

In [29]:
def is_time_passed(start_time, threshold):
    """
    Check if a certain amount of time has passed since the start time.

    Args:
        start_time (float): The start time in seconds since the epoch. Can be None.
        threshold (int): The time threshold in seconds.

    Returns:
        bool: True if the time difference between the current time and the start time
              is greater than the threshold, False otherwise.
    """
    return start_time is not None and math.floor(time.time() - start_time) > threshold

In [30]:
def reset_game_state():
    """
    Resets the game state to its initial values.

    This function sets the following global variables to their initial states:
    - winner_determined: Boolean flag indicating if a winner has been determined (set to False).
    - message_start_time: Timestamp for when a message was started (set to None).
    - message: The message to be displayed (set to a single space).
    - scores: Dictionary holding the scores for player1 and player2 (both set to 0).
    - prev_moves: Dictionary holding the previous moves for player1 and player2 (both set to None).
    - player_masks: Dictionary indicating if player1 and player2 are wearing masks (both set to False).
    - player_crowns: Dictionary indicating if player1 and player2 have crowns (both set to False).
    """
    # Function implementation here
    global winner_determined, message_start_time, message, scores, prev_moves, player_masks, player_crowns
    winner_determined = False
    message_start_time = None
    message = " "
    scores = {"player1": 0, "player2": 0}
    prev_moves = {"player1": None, "player2": None}
    player_masks = {"player1": False, "player2": False}
    player_crowns = {"player1": False, "player2": False}

In [31]:
def process_game_start(player_hands):
    """
    Process the start of the game by evaluating the players' hands.

    Args:
        player_hands (dict): A dictionary containing the hands of the players.
                             Example: {"player1": "rock", "player2": "rock"}

    Returns:
        tuple: A tuple containing:
            - bool: True if both players have chosen "rock", otherwise False.
            - float: The current time if both players have chosen "rock", otherwise None.
            - None: Always returns None as the third element of the tuple.
    """
    if player_hands["player1"] == "rock" and player_hands["player2"] == "rock":
        return True, time.time()
    return False, None, None

In [32]:
def check_motion(prev_positions, player_positions):
    """
    Check if there is significant motion for each player based on their previous and current positions.

    Args:
        prev_positions (dict): A dictionary containing the previous positions of the players.
                               Example: {"player1": (x1, y1), "player2": (x2, y2)}
        player_positions (dict): A dictionary containing the current positions of the players.
                                 Example: {"player1": (x1, y1), "player2": (x2, y2)}

    Returns:
        dict: A dictionary indicating whether motion was detected for each player.
              Example: {"player1": True, "player2": False}

    Notes:
        - The function assumes that the positions are tuples containing (x, y) coordinates.
        - The constant MOTION_THRESHOLD should be defined elsewhere in the code.
        - Motion is detected if the absolute difference in the y-coordinates exceeds MOTION_THRESHOLD.
    """
    motion_detected = {"player1": False, "player2": False}
    for player in ["player1", "player2"]:
        if prev_positions[player] and player_positions[player]:
            if abs(prev_positions[player][1] - player_positions[player][1]) > MOTION_THRESHOLD:
                motion_detected[player] = True
    return motion_detected

In [33]:
def cheat_detection(prev_moves, player_hands, hold_positions, player_positions):
    """
    Detects cheating in a game based on previous moves, current player hands, hold positions, and player positions.
    Args:
        prev_moves (dict): A dictionary containing the previous moves of the players.
                           Example: {"player1": move1, "player2": move2}
        player_hands (dict): A dictionary containing the current hands of the players.
                             Example: {"player1": hand1, "player2": hand2}
        hold_positions (dict): A dictionary containing the hold positions of the players.
                               Example: {"player1": position1, "player2": position2}
        player_positions (dict): A dictionary containing the current positions of the players.
                                 Example: {"player1": position1, "player2": position2}
    Returns:
        dict: A dictionary indicating whether each player is cheating.
              Example: {"player1": True, "player2": True}
    """
    cheat_motion = check_motion(hold_positions, player_positions)
    if cheat_motion["player1"] and cheat_motion["player2"]:
        return {"player1": True, "player2": True}
    if prev_moves["player1"] != player_hands["player1"] and prev_moves["player2"] != player_hands["player2"]:
        return {"player1": True, "player2": True}

    if cheat_motion["player1"] or prev_moves["player1"] != player_hands["player1"]:
        return {"player1": True, "player2": False}
    if cheat_motion["player2"] or prev_moves["player2"] != player_hands["player2"]:
        return {"player1": False, "player2": True}
    return {"player1": False, "player2": False}

In [34]:
def overlay_image_alpha(img, img_overlay, x, y, alpha_mask):
    """
    Overlay an image with an alpha mask onto another image at a specified position.

    Parameters:
    img (numpy.ndarray): The background image onto which the overlay will be applied.
    img_overlay (numpy.ndarray): The image to overlay on the background image.
    x (int): The x-coordinate of the top-left corner where the overlay image will be placed.
    y (int): The y-coordinate of the top-left corner where the overlay image will be placed.
    alpha_mask (numpy.ndarray): The alpha mask that determines the transparency of the overlay image.

    Returns:
    None: The function modifies the input background image in place.
    """
    y1, y2 = max(0, y), min(img.shape[0], y + img_overlay.shape[0])
    x1, x2 = max(0, x), min(img.shape[1], x + img_overlay.shape[1])

    y1o, y2o = max(0, -y), min(img_overlay.shape[0], img.shape[0] - y)
    x1o, x2o = max(0, -x), min(img_overlay.shape[1], img.shape[1] - x)

    if y1 >= y2 or x1 >= x2 or y1o >= y2o or x1o >= x2o:
        return

    img_crop = img[y1:y2, x1:x2]
    img_overlay_crop = img_overlay[y1o:y2o, x1o:x2o]
    alpha = alpha_mask[y1o:y2o, x1o:x2o, np.newaxis] / 255.0

    img_crop[:] = alpha * img_overlay_crop[:, :, :3] + (1 - alpha) * img_crop

In [35]:
def apply_filter(frame, face_positions, is_mask):
    """
    Apply a filter (mask or crown) to the detected face positions in the given frame.

    Args:
        frame (numpy.ndarray): The image frame where the filter will be applied.
        face_positions (dict): A dictionary containing face positions with keys as identifiers and values as tuples (x1, y1, x2, y2).
        is_mask (bool): A flag indicating whether to apply a mask (True) or a crown (False).

    Returns:
        None: The function modifies the input frame in place by overlaying the filter on detected faces.
    """
    if face_positions is not None:
        for _, pos in face_positions.items():
            if pos is not None:
                print("pos", pos)
                x1, y1, x2, y2 = pos
                face_width = x2 - x1
                if is_mask:
                    face_height = ((y2 - y1) * 3) // 4
                    resized_mask = cv2.resize(mask_img, (face_width, face_height), interpolation=cv2.INTER_AREA)
                else:
                    face_height = (y2 - y1) // 2
                    resized_mask = cv2.resize(crown_img, (face_width, face_height), interpolation=cv2.INTER_AREA)
                    y1 = y1 - face_height

                overlay_image_alpha(frame, resized_mask[:, :, :3], x1, y1, resized_mask[:, :, 3])

In [36]:
def apply_filters(combined_frame, face_positions, player_masks, player_crowns):
    """
    Apply filters to the combined frame based on player masks and crowns.

    Parameters:
    combined_frame (ndarray): The frame to which filters will be applied.
    face_positions (dict): A dictionary containing the positions of players' faces.
                           Example: {"player1": (x1, y1), "player2": (x2, y2)}
    player_masks (dict): A dictionary indicating whether each player is wearing a mask.
                         Example: {"player1": True, "player2": False}
    player_crowns (dict): A dictionary indicating whether each player is wearing a crown.
                          Example: {"player1": True, "player2": False}

    Returns:
    None
    """
    if player_masks["player1"] and player_masks["player2"]:
        apply_filter(combined_frame, face_positions, True)
    elif player_masks["player1"]:
        apply_filter(combined_frame, {"player1": face_positions["player1"]}, True)
    elif player_masks["player2"]:
        apply_filter(combined_frame, {"player2": face_positions["player2"]}, True)

    if player_crowns["player1"]:
        apply_filter(combined_frame, {"player1": face_positions["player1"]}, False)
    elif player_crowns["player2"]:
        apply_filter(combined_frame, {"player2": face_positions["player2"]}, False)

In [37]:
def determine_round_result(motion_check, cheat_check, moves, scores, frame, face_positions):
    """
    Determine the result of the current round in the game.

    Parameters:
    motion_check (dict): Dictionary indicating if each player has made a valid motion.
                         Example: {"player1": True, "player2": False}
    cheat_check (dict): Dictionary indicating if each player has cheated.
                        Example: {"player1": False, "player2": True}
    moves (dict): Dictionary containing the moves made by each player.
                  Example: {"player1": "rock", "player2": "scissors"}
    scores (dict): Dictionary containing the current scores of each player.
                   Example: {"player1": 0, "player2": 1}
    frame (object): The current frame of the game (e.g., an image or video frame).
    face_positions (dict): Dictionary containing the positions of each player's face in the frame.
                           Example: {"player1": (x1, y1, w1, h1), "player2": (x2, y2, w2, h2)}

    Returns:
    tuple: A tuple containing a string message indicating the result of the round and the updated scores.
           Example: ("Player 1 Wins!", {"player1": 1, "player2": 0})
    """
    if not moves["player1"] or not moves["player2"]:
        return "Not all players have made a move yet", scores
    elif (not motion_check["player1"] and not motion_check["player2"]) or (cheat_check["player1"] and cheat_check["player2"]):
        scores["player1"] -= 1
        scores["player2"] -= 1
        apply_filter(frame, face_positions, True)
        player_masks["player1"] = True
        player_masks["player2"] = True
        return "Player 1 & 2 cheated!", scores
    elif not motion_check["player1"] or cheat_check["player1"]:
        scores["player1"] -= 1
        apply_filter(frame, {"player1": face_positions["player1"]}, True)
        player_masks["player1"] = True
        return "Player 1 cheated!", scores
    elif not motion_check["player2"] or cheat_check["player2"]:
        scores["player2"] -= 1
        apply_filter(frame, {"player2": face_positions["player2"]}, True)
        player_masks["player2"] = True
        return "Player 2 cheated!", scores
    elif moves["player1"] == moves["player2"]:
        return "Draw!", scores
    elif (moves["player1"], moves["player2"]) in [('rock', 'scissors'), ('scissors', 'paper'), ('paper', 'rock')]:
        scores["player1"] += 1
        return "Player 1 Wins!", scores
    elif (moves["player2"], moves["player1"]) in [('rock', 'scissors'), ('scissors', 'paper'), ('paper', 'rock')]:
        scores["player2"] += 1
        return "Player 2 Wins!", scores
    else:
        return " ", scores

In [None]:
# Initialize video capture
cap = cv2.VideoCapture(0)
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.resizeWindow(WINDOW_NAME, 800, 600)  # Set the window to a larger size

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame.")
        break

    # Mirror the frame
    frame = mirror_frame(frame)

    # Annotate the frame with hand and face detection results
    combined_frame, hand_result, face_result = annotate_frame(frame, hand_model, face_model)

    # Display status bar with remaining time, scores, and message
    remaining_time = COUNTDOWN_TIME - math.floor(time.time() - start_time) if countdown_active and start_time else COUNTDOWN_TIME
    display_status(combined_frame, scores, remaining_time, message, winner="Player 1" if scores["player1"] >= WIN_SCORE else "Player 2")

    # Detect players' hands and faces
    player_hands, player_positions, detect_both_hands = detect_players_hands(hand_result, frame)
    face_positions = detect_players_faces(face_result, frame)

    # Apply filters (masks or crowns) to the detected faces
    apply_filters(combined_frame, face_positions, player_masks, player_crowns)

    if not game_started:
        # Reset game state if winner is determined and message time has passed
        if winner_determined and is_time_passed(message_start_time, MESSAGE_TIME):
            reset_game_state()

        # Start the game if both players show "rock" hand gesture
        elif not winner_determined and detect_both_hands and player_hands["player1"] == "rock" and player_hands["player2"] == "rock":
            if is_time_passed(end_round_start_time, END_ROUND_TIME):
                end_round_start_time = None
                game_started, start_time = process_game_start(player_hands)
                start_positions = player_positions.copy()
                motion_check = {"player1": False, "player2": False}
                countdown_active = True

        else:
            # Display instruction to start the game
            cv2.putText(combined_frame, "To start the game, make a fist with your hands", (40, BAR_HEIGHT + 400), FONT, 0.7, (255, 255, 255), 1)
    else:
        if countdown_active:
            if remaining_time > -WAIT_TIME:
                message = " "
                # Check for valid motion for each player
                if not motion_check["player1"]:
                    motion_check["player1"] = check_motion(start_positions, player_positions)["player1"]
                if not motion_check["player2"]:
                    motion_check["player2"] = check_motion(start_positions, player_positions)["player2"]
                print("Valid Action:", motion_check)

                if remaining_time >= 0:
                    prev_positions = player_positions.copy()
                    prev_moves = player_hands.copy()
                else:
                    # Detect cheating based on previous moves and current positions
                    cheat_check = cheat_detection(prev_moves, player_hands, prev_positions, player_positions)
            else:
                countdown_active = False

        else:
            # Determine the result of the round
            message, scores = determine_round_result(motion_check, cheat_check, prev_moves, scores, combined_frame, face_positions)
            end_round_start_time = time.time()
            game_started = False

            # Check if a player has won the game
            if not winner_determined and (scores["player1"] == WIN_SCORE or scores["player2"] == WIN_SCORE):
                message_start_time = time.time()
                winner_determined = True
                player_crowns["player1"] = True if scores["player1"] >= WIN_SCORE else False
                player_crowns["player2"] = True if scores["player2"] >= WIN_SCORE else False

            # Reset game state if winner is determined and message time has passed
            reset_game_state() if winner_determined and is_time_passed(message_start_time, MESSAGE_TIME) else None

        detect_both_hands = False

    # Display the resulting frame
    cv2.imshow(WINDOW_NAME, combined_frame)

    # Exit the loop when 'Esc' key is pressed
    key = cv2.waitKey(1)
    if key == 27:
        break

# Release the capture and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


0: 480x640 (no detections), 18.2ms
Speed: 2.0ms preprocess, 18.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 19.5ms
Speed: 2.0ms preprocess, 19.5ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 23.4ms
Speed: 2.0ms preprocess, 23.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 25.5ms
Speed: 2.0ms preprocess, 25.5ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 12.2ms
Speed: 4.4ms preprocess, 12.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 16.9ms
Speed: 2.3ms preprocess, 16.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 15.5ms
Speed: 5.0ms preprocess, 15.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 14.0ms
Speed: 1.0ms preprocess, 14.0ms inference, 2.0ms postprocess per imag