<h1><center><font style="color:rgb(100,109,254)">Playing Games using Pose Detection</font></center>


In [1]:
# Import the OpenCV library, a computer vision library that provides a lot of functionalities for image and video processing.
import cv2

# Import the pyautogui library, a cross-platform GUI automation Python module used to programmatically control the mouse and keyboard.
import pyautogui

# Import the time module, which provides various time-related functions.
from time import time

# Import the math module, which provides access to mathematical functions, and specifically the hypot function, which calculates the Euclidean norm (hypotenuse of a right-angled triangle).
from math import hypot

# Import the MediaPipe library, a framework for building multimodal applied machine learning pipelines.
import mediapipe as mp

# Import the matplotlib.pyplot module, a plotting library for creating static, animated, and interactive visualizations in Python.
import matplotlib.pyplot as plt

In [2]:
# Import the necessary modules from mediapipe
import mediapipe as mp

# Initialize the mediapipe pose class
mp_pose = mp.solutions.pose

# Setup the Pose function for images
# static_image_mode=True indicates that the function will be used for processing images
# min_detection_confidence=0.5 sets the minimum confidence threshold for pose detection
# model_complexity=1 sets the complexity of the model to be used (1 is the lightest)
pose_image = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5, model_complexity=1)

# Setup the Pose function for videos
# static_image_mode=False indicates that the function will be used for processing videos
# min_detection_confidence=0.7 sets the minimum confidence threshold for pose detection
# min_tracking_confidence=0.7 sets the minimum confidence threshold for pose tracking
pose_video = mp_pose.Pose(static_image_mode=False, model_complexity=1, min_detection_confidence=0.7, min_tracking_confidence=0.7)

# Initialize the mediapipe drawing class
mp_drawing = mp.solutions.drawing_utils

In [3]:
def detectPose(image, pose, draw=False, display=False):
    '''
    This function performs the pose detection on the most prominent person in an image.
    
    Args:
        image:   The input image with a prominent person whose pose landmarks needs to be detected.
        pose:    The pose function required to perform the pose detection.
        draw:    A boolean value that is if set to true the function draw pose landmarks on the output image. 
        display: A boolean value that is if set to true the function displays the original input image, and the 
                 resultant image and returns nothing.
    
    Returns:
        output_image: The input image with the detected pose landmarks drawn if it was specified.
        results:      The output of the pose landmarks detection on the input image.
    '''
    
    # Create a copy of the input image to avoid modifying the original image.
    output_image = image.copy()
    
    # Convert the image from BGR into RGB format, which is required by the pose detection function.
    imageRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Perform the Pose Detection using the provided pose function.
    results = pose.process(imageRGB)
    
    # Check if any landmarks are detected and are specified to be drawn.
    if results.pose_landmarks and draw:
        # Draw Pose Landmarks on the output image using the MediaPipe drawing utilities.
        mp_drawing.draw_landmarks(image=output_image, landmark_list=results.pose_landmarks,
                                  connections=mp_pose.POSE_CONNECTIONS,
                                  landmark_drawing_spec=mp_drawing.DrawingSpec(color=(255,255,255),
                                                                               thickness=3, circle_radius=3),
                                  connection_drawing_spec=mp_drawing.DrawingSpec(color=(49,125,237),
                                                                               thickness=2, circle_radius=2))

    # Check if the original input image and the resultant image are specified to be displayed.
    if display:
        # Display the original input image and the resultant image using matplotlib.
        plt.figure(figsize=[22,22])
        plt.subplot(121);plt.imshow(image[:,:,::-1]);plt.title("Original Image");plt.axis('off');
        plt.subplot(122);plt.imshow(output_image[:,:,::-1]);plt.title("Output Image");plt.axis('off');
        
    # Otherwise, return the output image and the results of pose landmarks detection.
    else:
        return output_image, results

In [10]:
def checkHandsJoined(image, results, draw=False, display=False):
    '''
    This function checks whether the hands of the person are joined or not in an image.
    
    Args:
        image:   The input image with a prominent person whose hands status (joined or not) needs to be classified.
        results: The output of the pose landmarks detection on the input image.
        draw:    A boolean value that is if set to true the function writes the hands status & distance on the output image. 
        display: A boolean value that is if set to true the function displays the resultant image and returns nothing.
    
    Returns:
        output_image: The same input image but with the classified hands status written, if it was specified.
        hand_status:  The classified status of the hands whether they are joined or not.
    '''
    
    # Get image dimensions
    height, width, _ = image.shape
    
    # Create a copy of the input image to write the hands status label on
    output_image = image.copy()
    
    # Get left and right wrist landmark coordinates
    left_wrist_landmark = (results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_WRIST].x * width,
                          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_WRIST].y * height)
    right_wrist_landmark = (results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_WRIST].x * width,
                           results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_WRIST].y * height)
    
    # Calculate Euclidean distance between left and right wrist
    euclidean_distance = int(hypot(left_wrist_landmark[0] - right_wrist_landmark[0],
                                   left_wrist_landmark[1] - right_wrist_landmark[1]))
    
    # Determine hand status based on distance threshold
    if euclidean_distance < 120:
        hand_status = 'Hands Joined'
        color = (0, 255, 0)  # Green
    else:
        hand_status = 'Hands Not Joined'
        color = (0, 0, 255)  # Red
    
    # Draw hands status and distance on output image if specified
    if draw:
        cv2.putText(output_image, hand_status, (10, 30), cv2.FONT_HERSHEY_PLAIN, 2, color, 3)
        cv2.putText(output_image, f'Distance: {euclidean_distance}', (10, 70),
                    cv2.FONT_HERSHEY_PLAIN, 2, color, 3)
    
    # Display output image if specified
    if display:
        plt.figure(figsize=[10,10])
        plt.imshow(output_image[:,:,::-1])
        plt.title("Output Image")
        plt.axis('off')
    
    # Return output image and hand status if not displaying
    else:
        return output_image, hand_status

# SAMPLE TEST 1

In [5]:
# Initialize the VideoCapture object to read from the webcam.
camera_video = cv2.VideoCapture(0)
camera_video.set(3,1280)
camera_video.set(4,960)

# Create named window for resizing purposes.
cv2.namedWindow('Hands Joined?', cv2.WINDOW_NORMAL)

# Iterate until the webcam is accessed successfully.
while camera_video.isOpened():
    
    # Read a frame.
    rat, frame = camera_video.read()
    
    # Check if frame is not read properly then continue to the next iteration to read the next frame.
    if not rat:
        continue
    
    # Flip the frame horizontally for natural (selfie-view) visualization.
    frame = cv2.flip(frame, 1)
    
    # Get the height and width of the frame of the webcam video.
    frame_height, frame_width, _ = frame.shape
    
    # Perform the pose detection on the frame.
    frame, results = detectPose(frame, pose_video, draw = True)
    
    # Check if the pose landmarks in the frame are detected.
    if results.pose_landmarks:
            
        # Check if the left and right hands are joined.
        frame, _ = checkHandsJoined(frame, results, draw=True)
                
    # Display the frame.
    cv2.imshow('Hands Joined?', frame)
    
    # Wait for 1ms. If a key is pressed, retreive the ASCII code of the key.
    if cv2.waitKey(1) == ord('\x1b') : 
        break

# Release the VideoCapture Object and close the windows.
camera_video.release()
cv2.destroyAllWindows()

In [8]:
def check_left_right(image, results, draw=False, display=False):
    '''
    This function finds the horizontal position (left, center, right) of the person in an image.
    
    Args:
        image:   The input image with a prominent person whose the horizontal position needs to be found.
        results: The output of the pose landmarks detection on the input image.
        draw:    A boolean value that is if set to true the function writes the horizontal position on the output image. 
        display: A boolean value that is if set to true the function displays the resultant image and returns nothing.
    
    Returns:
        output_image:         The same input image but with the horizontal position written, if it was specified.
        horizontal_position:  The horizontal position (left, center, right) of the person in the input image.
    '''
    
    # Declare a variable to store the horizontal position (left, center, right) of the person.
    horizontal_position = None
    
    # Get the height and width of the image.
    height, width, _ = image.shape
    
    # Create a copy of the input image to write the horizontal position on.
    output_image = image.copy()
    
    # Retrieve the x-coordinate of the left shoulder landmark.
    left_x = int(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER].x * width)
    
    # Retrieve the x-coordinate of the right shoulder landmark.
    right_x = int(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER].x * width)
    
    # Check if the person is at left that is when both shoulder landmarks x-coordinates
    # are less than or equal to the x-coordinate of the center of the image.
    if left_x <= width // 2 and right_x <= width // 2:
        horizontal_position = 'Left'
    
    # Check if the person is at right that is when both shoulder landmarks x-coordinates
    # are greater than or equal to the x-coordinate of the center of the image.
    elif left_x >= width // 2 and right_x >= width // 2:
        horizontal_position = 'Right'
    
    # Check if the person is at center that is when right shoulder landmark x-coordinate is greater than or equal to
    # and left shoulder landmark x-coordinate is less than or equal to the x-coordinate of the center of the image.
    elif left_x <= width // 2 and right_x >= width // 2:
        horizontal_position = 'Center'
    
    # Check if the person's horizontal position and a line at the center of the image is specified to be drawn.
    if draw:
        # Write the horizontal position of the person on the image.
        cv2.putText(output_image, horizontal_position, (5, height - 10), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 3)
        
        # Draw a line at the center of the image.
        cv2.line(output_image, (width // 2, 0), (width // 2, height), (255, 255, 255), 2)
    
    # Check if the output image is specified to be displayed.
    if display:
        # Display the output image.
        plt.figure(figsize=[10, 10])
        plt.imshow(output_image[:, :, ::-1])
        plt.title("Output Image")
        plt.axis('off')
    
    # Otherwise
    else:
        # Return the output image and the person's horizontal position.
        return output_image, horizontal_position

# SAMPLE TEST 2

In [9]:
# Initialize the VideoCapture object to read from the webcam.
camera_video = cv2.VideoCapture(0)
camera_video.set(3,1280)
camera_video.set(4,960)

# Create named window for resizing purposes.
cv2.namedWindow('Horizontal Movements', cv2.WINDOW_NORMAL)

# Iterate until the webcam is accessed successfully.
while camera_video.isOpened():
    
    # Read a frame.
    ok, frame = camera_video.read()
    
    # Check if frame is not read properly then continue to the next iteration to read the next frame.
    if not ok:
        continue
    
    # Flip the frame horizontally for natural (selfie-view) visualization.
    frame = cv2.flip(frame, 1)
    
    # Get the height and width of the frame of the webcam video.
    frame_height, frame_width, _ = frame.shape
    
    # Perform the pose detection on the frame.
    frame, results = detectPose(frame, pose_video, draw=True)
    
    # Check if the pose landmarks in the frame are detected.
    if results.pose_landmarks:
            
        # Check the horizontal position of the person in the frame.
        frame, _ = checkLeftRight(frame, results, draw=True)
                
    # Display the frame.
    cv2.imshow('Horizontal Movements', frame)
    
    # Wait for 1ms. If a a key is pressed, retreive the ASCII code of the key.
    k = cv2.waitKey(1) & 0xFF
    
    # Check if 'ESC' is pressed and break the loop.
    if(k == 27):
        break

# Release the VideoCapture Object and close the windows.
camera_video.release()
cv2.destroyAllWindows()

In [6]:
def checkJumpCrouch(image, results, MID_y=250, draw=False, display=False):
    '''
    This function checks the posture (Jumping, Crouching or Standing) of the person in an image.
    
    Args:
        image: The input image with a prominent person whose posture needs to be checked.
        results: The output of the pose landmarks detection on the input image.
        MID_y: The initial center y-coordinate of both shoulders landmarks of the person recorded during starting the game.
              This will give the idea of the person's height when he is standing straight.
        draw: A boolean value that is if set to true the function writes the posture on the output image.
        display: A boolean value that is if set to true the function displays the resultant image and returns nothing.
    
    Returns:
        output_image: The input image with the person's posture written, if it was specified.
        posture: The posture (Jumping, Crouching or Standing) of the person in an image.
    '''
    
    # Get the height and width of the image.
    height, width, _ = image.shape
    
    # Create a copy of the input image to write the posture label on.
    output_image = image.copy()
    
    # Retrieve the y-coordinates of the left and right shoulder landmarks.
    left_y = int(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER].y * height)
    right_y = int(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER].y * height)
    
    # Calculate the y-coordinate of the mid-point of both shoulders.
    actual_mid_y = (right_y + left_y) // 2
    
    # Calculate the upper and lower bounds of the threshold.
    lower_bound = MID_y - 15
    upper_bound = MID_y + 100
    
    # Determine the posture based on the mid-point y-coordinate.
    if actual_mid_y < lower_bound:
        posture = 'Jumping'
    elif actual_mid_y > upper_bound:
        posture = 'Crouching'
    else:
        posture = 'Standing'
    
    # Draw the posture and threshold line on the output image if specified.
    if draw:
        cv2.putText(output_image, posture, (5, height - 50), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 3)
        cv2.line(output_image, (0, MID_y), (width, MID_y), (255, 255, 255), 2)
    
    # Display the output image if specified.
    if display:
        plt.figure(figsize=[10, 10])
        plt.imshow(output_image[:, :, ::-1])
        plt.title("Output Image")
        plt.axis('off')
    else:
        # Return the output image and posture if not displaying.
        return output_image, posture

# SAMPLE TEST 3

In [7]:
# Initialize the VideoCapture object to read from the default camera (index 0).
# Using Exception Handling methods when working with external devices.
camera_video = cv2.VideoCapture(0)
if not camera_video.isOpened():
    print("Cannot open camera")
    exit()

# Set the camera resolution to 1280x960.
camera_video.set(3, 1280)
camera_video.set(4, 960)

# Create a named window for resizing purposes.
# The window name is 'Verticial Movements' and it's set to be resizable (WINDOW_NORMAL).
cv2.namedWindow('Verticial Movements', cv2.WINDOW_NORMAL)

# Iterate until the webcam is accessed successfully.
while camera_video.isOpened():
    # Read a frame from the camera.
    ok, frame = camera_video.read()
    
    # Check if the frame is not read properly, then continue to the next iteration to read the next frame.
    if not ok:
        continue
    
    # Flip the frame horizontally for natural (selfie-view) visualization.
    frame = cv2.flip(frame, 1)
    
    # Get the height and width of the frame of the webcam video.
    frame_height, frame_width, _ = frame.shape
    
    # Perform the pose detection on the frame.
    frame, results = detectPose(frame, pose_video, draw=True)
    
    # Check if the pose landmarks in the frame are detected.
    if results.pose_landmarks:
        # Check the posture (jumping, crouching or standing) of the person in the frame.
        frame, _ = checkJumpCrouch(frame, results, draw=True)
    
    # Display the frame.
    cv2.imshow('Verticial Movements', frame)
    
    # Wait for 1ms. If a key is pressed, retrieve the ASCII code of the key.
    k = cv2.waitKey(1) & 0xFF
    
    # Check if 'ESC' is pressed and break the loop.
    if k == 27:
        break

# Release the VideoCapture Object and close the windows.
camera_video.release()
cv2.destroyAllWindows()

In [36]:
x, y = pyautogui.position()
print(x,y)

735 542


In [54]:
# Initialize the VideoCapture object to read from the webcam.
camera_video = cv2.VideoCapture(0)
camera_video.set(3, 1280)
camera_video.set(4, 960)

# Create named window for resizing purposes.
cv2.namedWindow('Playing Games with Pose Detection', cv2.WINDOW_NORMAL)

# Initialize variables
time1 = 0  # time of the previous frame
game_started = False  # state of the game (started or not)
x_pos_index = 1  # index of the current horizontal position of the person
y_pos_index = 1  # index of the current vertical posture of the person
MID_Y = None  # initial y-coordinate of the mid-point of both shoulders of the person
counter = 0  # count of the number of consecutive frames with person's hands joined
num_of_frames = 10  # number of consecutive frames to check if person hands joined before starting the game

# Define constants for better readability
FRAME_WIDTH = 1280
FRAME_HEIGHT = 960
FPS_FONT_SIZE = 2
FPS_FONT_COLOR = (0, 255, 0)
FPS_FONT_THICKNESS = 3

# Iterate until the webcam is accessed successfully.
while camera_video.isOpened():
    # Read a frame.
    ret, frame = camera_video.read()
    
    # Check if frame is not read properly then continue to the next iteration to read the next frame.
    if not ret:
        continue
    
    # Flip the frame horizontally for natural (selfie-view) visualization.
    frame = cv2.flip(frame, 1)
    
    # Perform the pose detection on the frame.
    frame, results = detectPose(frame, pose_video, draw=game_started)
    
    # Check if the pose landmarks in the frame are detected.
    if results.pose_landmarks:
        # Check if the game has started
        if game_started:
            # Commands to control the horizontal movements of the character.
            frame, horizontal_position = checkLeftRight(frame, results, draw=True)
            if (horizontal_position == 'Left' and x_pos_index!= 0) or (horizontal_position == 'Center' and x_pos_index == 2):
                pyautogui.press('left')
                x_pos_index -= 1
            elif (horizontal_position == 'Right' and x_pos_index!= 2) or (horizontal_position == 'Center' and x_pos_index == 0):
                pyautogui.press('right')
                x_pos_index += 1
        else:
            # Write the text representing the way to start the game on the frame.
            cv2.putText(frame, 'JOIN BOTH HANDS TO START THE GAME.', (5, FRAME_HEIGHT - 10), cv2.FONT_HERSHEY_PLAIN,
                        FPS_FONT_SIZE, FPS_FONT_COLOR, FPS_FONT_THICKNESS)
        
        # Command to Start or resume the game.
        if checkHandsJoined(frame, results)[1] == 'Hands Joined':
            counter += 1
            if counter == num_of_frames:
                if not game_started:
                    game_started = True
                    left_y = int(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER].y * FRAME_HEIGHT)
                    right_y = int(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER].y * FRAME_HEIGHT)
                    MID_Y = abs(right_y + left_y) // 2
                    pyautogui.click(x=810, y=491, button='left')
                    pyautogui.doubleClick(x=743, y=540, button='left')
                else:
                    pyautogui.press('space')
                counter = 0
        else:
            counter = 0
        
        # Commands to control the vertical movements of the character.
        if MID_Y:
            frame, posture = checkJumpCrouch(frame, results, MID_Y, draw=True)
            if posture == 'Jumping' and y_pos_index == 1:
                pyautogui.press('up')
                y_pos_index += 1
            elif posture == 'Crouching' and y_pos_index == 1:
                pyautogui.press('down')
                y_pos_index -= 1
            elif posture == 'Standing' and y_pos_index!= 1:
                y_pos_index = 1
    
    # Calculate the frames updates in one second
    time2 = time()
    if (time2 - time1) > 0:
        frames_per_second = 1.0 / (time2 - time1)
        cv2.putText(frame, 'FPS: {}'.format(int(frames_per_second)), (10, 30), cv2.FONT_HERSHEY_PLAIN, FPS_FONT_SIZE, FPS_FONT_COLOR, FPS_FONT_THICKNESS)
    time1 = time2
    
    # Display the frame.
    cv2.imshow('Playing Games with Pose Detection', frame)
    
    # Wait for 1ms. If a a key is pressed, retreive the ASCII code of the key.
    k = cv2.waitKey(1)
    
    # Check if 'ESC' is pressed and break the loop.
    if k == 27:
        break

# Release the VideoCapture Object and close the windows.
camera_video.release()
cv2.destroyAllWindows()