In [7]:
import cv2
import numpy as np
import time
from ultralytics import YOLO

# Load the YOLO model with specified weights
model = YOLO("best.pt")

# Open a connection to the webcam (0 is usually the default camera)
cap = cv2.VideoCapture(0)

# Define the gesture classes
All_Classes = ['Paper', 'Rock', 'Scissors']

# Initialize scores and timer variables
Player1_Score = 0
Player2_Score = 0
Last_update_time = time.time()  # Time of the last score update
update_interval = 5  # Minimum interval between score updates in seconds

# Function to determine the winner between two gestures
def determine_winner(gesture1, gesture2):
    if gesture1 == gesture2:
        return 0  # It's a tie
    elif (gesture1 == 'Rock' and gesture2 == 'Scissors') or \
         (gesture1 == 'Scissors' and gesture2 == 'Paper') or \
         (gesture1 == 'Paper' and gesture2 == 'Rock'):
        return 1  # Player 1 wins
    else:
        return 2  # Player 2 wins

# Main loop to process the video frames
while True:
    # Capture a frame from the webcam
    ret, frame = cap.read()
    if not ret:
        break  # Break the loop if frame capture fails
    
    # Resize the frame for processing
    frame = cv2.resize(frame, (1020, 500))
    
    # Use the YOLO model to track objects in the frame
    results = model.track(frame)
    
    # Ensure the results contain bounding boxes
    if len(results) > 0 and hasattr(results[0], 'boxes'):
        # Extract bounding boxes, confidences, and class labels
        boxes = results[0].boxes.xyxy.cpu().numpy()
        confs = results[0].boxes.conf.cpu().numpy()
        classes = results[0].boxes.cls.cpu().numpy().astype(int)

        Player1_gesture = None
        Player2_gesture = None

        # Iterate over detected objects
        for box, conf, cls in zip(boxes, confs, classes):
            if conf > 0.25:  # Process only if confidence is greater than 0.25
                x1, y1, x2, y2 = map(int, box)  # Convert box coordinates to integers
                gesture = All_Classes[cls]  # Get gesture name from class index
                label = f"{gesture}: {conf:.2f}"  # Create label text with gesture and confidence

                # Determine which player the gesture belongs to based on horizontal position
                cx = (x1 + x2) // 2  # Calculate center x-coordinate of the bounding box
                if cx < frame.shape[1] // 2:  # Left half of the frame
                    Player1_gesture = gesture
                else:  # Right half of the frame
                    Player2_gesture = gesture

                # Draw bounding box and label on the frame
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        # Check if both players have made a gesture and if the update interval has passed
        current_time = time.time()
        if Player1_gesture and Player2_gesture and current_time - Last_update_time > update_interval:
            # Determine the winner and update scores
            winner = determine_winner(Player1_gesture, Player2_gesture)
            if winner == 1:
                Player1_Score += 1
            elif winner == 2:
                Player2_Score += 1
            Last_update_time = current_time  # Update the last score update time

    # Display the scores on the frame
    score_text = f"Player one: {Player1_Score}  Player two: {Player2_Score}"
    cv2.putText(frame, score_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Show the processed frame in a window
    cv2.imshow("Rock, Paper, Scissors Game", frame)

    # Exit the loop if the 'Esc' key is pressed
    key = cv2.waitKey(1) & 0xFF
    if key == 27:  # ESC key
        break

# Release the video capture object and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()



0: 320x640 (no detections), 28.7ms
Speed: 3.8ms preprocess, 28.7ms inference, 0.2ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 25.7ms
Speed: 11.4ms preprocess, 25.7ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 28.9ms
Speed: 0.0ms preprocess, 28.9ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 26.3ms
Speed: 6.6ms preprocess, 26.3ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 27.3ms
Speed: 6.8ms preprocess, 27.3ms inference, 1.2ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 20.5ms
Speed: 0.5ms preprocess, 20.5ms inference, 7.7ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 21.3ms
Speed: 0.0ms preprocess, 21.3ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 21.0ms
Speed: 0.0ms preprocess, 21.0ms 