In [None]:
#!pip install tensorflow tensorflow-hub opencv-python matplotlib

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Description: Imports the necessary libraries into the Python environment.
# - tf, hub: TensorFlow and TensorFlow Hub for model loading and execution.
# - cv2: OpenCV for image processing tasks.
# - plt: Matplotlib's pyplot for plotting and image display.
# - np: NumPy for numerical operations, particularly array handling.




In [3]:
model_url = "https://tfhub.dev/google/movenet/singlepose/lightning/4"  
module = hub.load(model_url)
pose_estimator = module.signatures['serving_default']










In [4]:
def preprocess_image(image_path, input_size):
    """Preprocesses an image for pose estimation."""
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert BGR to RGB (Matplotlib and TF Hub models often expect RGB)
    img_resized = cv2.resize(img, input_size)
    img_scaled = img_resized / 255.0  # Normalize pixel values to [0, 1]
    img_input = img_scaled[np.newaxis, ...] # Add batch dimension
    return img_input, img_resized # Return both processed input and resized image for visualization later


input_size = (192, 192) # Input size for MoveNet Lightning - Check TF Hub documentation for the model you choose

# Description: Defines a function to preprocess images before feeding them to the pose estimation model.
# - preprocess_image(image_path, input_size): Takes the image path and expected input size as arguments.
# - cv2.imread(image_path): Loads the image from the specified path using OpenCV.
# - cv2.cvtColor(img, cv2.COLOR_BGR2RGB): Converts the image from OpenCV's default BGR color format to RGB, as many models (and Matplotlib) expect RGB.
# - cv2.resize(img, input_size): Resizes the image to the model's expected input dimensions.
# - img_resized / 255.0: Normalizes pixel values to the range [0, 1] by dividing by 255.0 (the maximum pixel value).
# - img_scaled[np.newaxis, ...]: Adds a batch dimension to the image array. Deep learning models typically expect input in batches, even for single image inference.
# - input_size = (192, 192): Sets the expected input size for the MoveNet Lightning model. This size is crucial for the model to work correctly. You should usually find this information in the model's documentation on TensorFlow Hub or elsewhere.
# - Returns: The function returns two values:
#   - img_input: The preprocessed image ready to be fed into the model.
#   - img_resized: The resized image (without normalization or batch dimension) which is useful for visualization as it matches the keypoint coordinates output size.

In [5]:
def estimate_pose(image_input):
    """Estimates pose keypoints from a preprocessed image."""
    outputs = pose_estimator(image_input) # Run inference
    keypoints = outputs['output_0'].numpy() # Extract keypoints -  Inspect model output to find the keypoint tensor name. It might vary across models.

    return keypoints

# Description: Defines a function to perform pose estimation using the loaded model.
# - estimate_pose(image_input): Takes the preprocessed image (output of `preprocess_image`) as input.
# - pose_estimator(image_input): Executes the pose estimation model's 'predict' signature on the input image. This performs the actual inference.
# - outputs = ...:  Captures the output of the model. The structure of 'outputs' is model-specific.
# - outputs['output_0'].numpy(): Extracts the keypoint data from the model's output. The name 'output_0' is specific to this MoveNet model.  You'll need to examine the `outputs.keys()` or model documentation to find the correct tensor name that holds the keypoints for other models. `.numpy()` converts the TensorFlow tensor to a NumPy array for easier manipulation.
# - Returns: The function returns the `keypoints` NumPy array. This array contains the predicted keypoint coordinates and confidence scores.

In [6]:
def draw_keypoints(image, keypoints, confidence_threshold=0.5):
    """Draws keypoints and skeleton on the image."""
    y, x, c = image.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1])) # Adjust keypoint coordinates to image scale

    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold: # Only draw keypoints above confidence threshold
            cv2.circle(image, (int(kx), int(ky)), 4, (0,255,0), -1) # Draw filled green circles

    # Define keypoint connections (skeleton) -  MoveNet specific connections
    EDGES = {
        (0, 1): 'm',
        (0, 2): 'c',
        (1, 3): 'm',
        (2, 4): 'c',
        (0, 5): 'm',
        (0, 6): 'c',
        (5, 7): 'm',
        (6, 8): 'c',
        (7, 9): 'm',
        (8, 10): 'c',
        (5, 6): 'y',
        (5, 11): 'm',
        (6, 12): 'c',
        (11, 12): 'y',
        (11, 13): 'm',
        (12, 14): 'c',
        (13, 15): 'm',
        (14, 16): 'c'
    }

    for edge, color in EDGES.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        if (c1 > confidence_threshold) and (c2 > confidence_threshold):
            cv2.line(image, (int(x1), int(y1)), (int(x2), int(y2)), (255,255,255), 2) # Draw white lines

    return image

# Description: Defines a function to visualize the predicted pose keypoints on an image.
# - draw_keypoints(image, keypoints, confidence_threshold=0.5):
#   - image: The original resized image (output of `preprocess_image` - `img_resized`).
#   - keypoints: The predicted keypoints (output of `estimate_pose`).
#   - confidence_threshold:  A threshold to filter out keypoints with low confidence scores. Only keypoints with a confidence score above this threshold will be drawn. Default is 0.5.
# - y, x, c = image.shape: Gets the height, width, and channels of the input image.
# - shaped = np.squeeze(np.multiply(keypoints, [y,x,1])): Reshapes and scales the keypoint coordinates.
#   - `keypoints` from MoveNet are typically normalized to the range [0, 1]. We multiply them by [y, x, 1] (image height, image width, 1) to scale them back to the image's pixel coordinates.
#   - `np.squeeze()` removes dimensions of size one from the array.
# - for kp in shaped:: Iterates through each predicted keypoint.
# - ky, kx, kp_conf = kp: Unpacks the keypoint data into y-coordinate (ky), x-coordinate (kx), and confidence score (kp_conf).
# - if kp_conf > confidence_threshold:: Checks if the keypoint's confidence score is above the set threshold.
# - cv2.circle(...): Draws a filled green circle at the keypoint location (kx, ky) if its confidence is high enough.
# - EDGES = {...}: Defines connections between keypoints to draw a skeleton. The keys are pairs of keypoint indices, and the values are colors (though color is not directly used in the line drawing, it's often used for different body parts). These edges are specific to the MoveNet model's keypoint ordering.
# - for edge, color in EDGES.items():: Iterates through the defined skeleton edges.
# - p1, p2 = edge: Gets the indices of the two keypoints forming an edge.
# - y1, x1, c1 = shaped[p1], y2, x2, c2 = shaped[p2]: Gets the coordinates and confidence scores for the two keypoints of the edge.
# - if (c1 > confidence_threshold) and (c2 > confidence_threshold): Checks if both keypoints forming the edge have confidence scores above the threshold.
# - cv2.line(...): Draws a white line between the two keypoint locations (x1, y1) and (x2, y2) if both keypoints are confident.
# - Returns: The function returns the input `image` with keypoints and skeleton drawn on it.

In [10]:
import urllib.request

image_url = "https://commons.wikimedia.org/wiki/Category:Faces_cloaked_by_censor_bars#/media/File:Lion_waiting_in_Nambia_adapt%C3%A9.jpg"
image_path = "/FYP_IntelliTrain/PoseEstimation/t2.jpg"

urllib.request.urlretrieve(image_url, image_path) # Download image


image_input, display_image = preprocess_image(image_path, input_size) # Preprocess the image
keypoints = estimate_pose(image_input) # Perform pose estimation
display_image_with_keypoints = display_image.copy() # Create a copy to avoid modifying original resized image
output_image = draw_keypoints(display_image_with_keypoints, keypoints, confidence_threshold=0.3) # Draw keypoints with a confidence threshold

plt.figure(figsize=(10,10))
plt.imshow(output_image)
plt.axis('off') # Turn off axis labels
plt.title("Pose Estimation Result")
plt.show()

# Description: This cell demonstrates the complete pose estimation pipeline on a sample image.
# - import urllib.request: Imports the `urllib.request` module to download images from URLs.
# - image_url = ...: Defines the URL of a sample image from Wikimedia Commons.
# - image_path = "yo_gotti.jpg": Sets the local file path where the downloaded image will be saved.
# - urllib.request.urlretrieve(image_url, image_path): Downloads the image from `image_url` and saves it to `image_path`.
# - image_input, display_image = preprocess_image(image_path, input_size): Preprocesses the downloaded image using the `preprocess_image` function.
# - keypoints = estimate_pose(image_input): Performs pose estimation on the preprocessed image using the `estimate_pose` function.
# - display_image_with_keypoints = display_image.copy(): Creates a copy of the resized image (`display_image`). This is important so that the `draw_keypoints` function modifies a copy and not the original `display_image`, which might be needed later.
# - output_image = draw_keypoints(...): Visualizes the predicted keypoints on the copied image using the `draw_keypoints` function, with a confidence threshold of 0.3.
# - plt.figure(figsize=(10,10)), plt.imshow(output_image), plt.axis('off'), plt.title("Pose Estimation Result"), plt.show(): Uses Matplotlib to display the image with drawn keypoints.
#   - `plt.figure(figsize=(10,10))`: Creates a figure with a specific size for display.
#   - `plt.imshow(output_image)`: Displays the image.
#   - `plt.axis('off')`: Turns off the axis ticks and labels for a cleaner image display.
#   - `plt.title("Pose Estimation Result")`: Sets the title of the plot.
#   - `plt.show()`: Shows the plot.

URLError: <urlopen error [WinError 10054] An existing connection was forcibly closed by the remote host>