In [2]:
import numpy as np # linear algebra
import pandas as pd
import xml.etree.ElementTree as ET
from pose_estimation.keypoints import Keypoints
from pose_estimation.keypoint_statistics import KeypointStatistics
from pose_estimation.scoring.angle_score import AngleScore
from mediapipe.tasks.python.components.containers.landmark import Landmark
import mediapipe as mp

In [90]:
# Read XML File
xml_file = 'dataset/annotations.xml'
tree = ET.parse(xml_file)
root = tree.getroot()

dataset = {}

# Keypoint mapping from dataset numbers in annotations.xml to body part names used in Keypoints
datasetKeypoints = {
            5 : "left_shoulder",
            2 : "right_shoulder",
            6 : "left_elbow",
            3 : "right_elbow",
            7 : "left_wrist",
            4 : "right_wrist",
            11 : "left_hip",
            8 : "right_hip",
            12 : "left_knee",
            9 : "right_knee",
            13 : "left_ankle",
            10 : "right_ankle"
        }

imageDimensions = {}

images = root.findall("image")

for image in images:
    points = image.findall("points")

    keypoints = Keypoints(0, 0, 0, 0, 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0)
    for point in points:
        point_id = int(point.attrib["label"])
        x, y = point.attrib["points"].split(",")
        if point_id in datasetKeypoints:
            setattr(keypoints, datasetKeypoints[point_id], Landmark(x=float(x), y=float(y)))
    dataset.update({image.attrib["name"]: keypoints})

    # Store image dimensions
    imageDimensions.update({image.attrib["name"]: (int(image.attrib["width"]), int(image.attrib["height"]) )})

'''
Created a dictionary to store keypoints.
Format: {image_name: Keypoints}
Keypoints is our own Keypoints object.
'''

'\nCreated a dictionary to store keypoints.\nFormat: {image_name: Keypoints}\nKeypoints is our own Keypoints object.\n'

In [4]:
'''
Setup Media Pipe
'''
import mediapipe as mp
from pose_estimation.mediapipe import MediaPipe
from mediapipe.tasks.python import vision

media_pipe = MediaPipe()
media_pipe.initialize(mode=vision.RunningMode.IMAGE, parameter_path="../pose_landmarker_full.task")

I0000 00:00:1711385739.014489       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 83.1), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Functions to test images

In [86]:
from timeit import timeit
import cv2

def imageInfrence(image):
    """
    process image using mediapipe
    return tine taken and the keypoints
    """
    start = timeit()
    keypoints =  media_pipe.process_image(image)
    end = timeit()

    time = end - start
    return keypoints, time

def compareKeypoints(correctKeypoints: Keypoints, estimatedKeypoints: Keypoints, dimensions: tuple[int,int]):
    """
    compare keypoints and return the mean squared error
    """
    correctKeypointsArray = correctKeypoints.to_numpy_positions()
    estimatedKeypointsArray = estimatedKeypoints.to_numpy_positions()

    #normalise correct keypoints by image dimensions
    correctKeypointsArray[:,0] = correctKeypointsArray[:,0] / dimensions[0]
    correctKeypointsArray[:,1] = correctKeypointsArray[:,1] / dimensions[1]

    keypointDiffs = correctKeypointsArray - estimatedKeypointsArray
    keypointDiffs = np.linalg.norm(keypointDiffs, axis=1)

    mse = np.square(keypointDiffs).mean()
    return mse

def testImage(image_filename, image_keypoints, resolution_scale=1.0):
    """
    test and image based on resolution
    return mean squared error and time taken to process image
    
    :param image_filename: name of the image file
    :param image_keypoints: correct keypoints
    :param resolution_scale: scale to resize image
    """
    
    # use cv2 to get image
    image = cv2.imread("dataset/" + image_filename)

    # resize image by resolution scale
    # TODO: currently resizes by scale
    #       can change to resize by width or height
    image = cv2.resize(image, (0,0), fx=resolution_scale, fy=resolution_scale)

    # convert image to mediapipe image
    image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)

    correctKeypoints = image_keypoints
    estimatedKeypoints, time = imageInfrence(image)

    mse = compareKeypoints(correctKeypoints, estimatedKeypoints, imageDimensions[image_filename])
    return mse, time




Construct pd dataframe

In [101]:
resolutions = [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0]
images_to_skip = ["PE/4.jpg", "PE/8.jpg", "PE/18.jpg"]

data = {}
data_with_times = {"resolution" : resolutions}


# Test all images
for res in resolutions:
    mses = []
    times = []
    for image_name, image_keypoints in dataset.items():
        # skip these images
        if image_name in images_to_skip:
            continue
        mse, time = testImage(image_name, image_keypoints, 1)
        mses.append(mse)
        times.append(time)

    data[res] = mses

    data_with_times["mean time"] = np.mean(times)
    data_with_times["mean mse"] = np.mean(mses)

df = pd.DataFrame(data, index = [image_name for image_name in dataset.keys() if image_name not in images_to_skip])
df_with_times = pd.DataFrame(data_with_times)

print(df_with_times)


   mean time  mean mse  resolution
0   0.000025  0.014067        0.25
1   0.000025  0.014067        0.50
2   0.000025  0.014067        0.75
3   0.000025  0.014067        1.00
4   0.000025  0.014067        1.25
5   0.000025  0.014067        1.50
6   0.000025  0.014067        1.75
7   0.000025  0.014067        2.00
