In [1]:
import numpy as np

In [2]:
def normalize_keypoints(keypoints, anchor_idx1, anchor_idx2):
    # Extract reference keypoints
    x1, y1 = keypoints[anchor_idx1]
    x2, y2 = keypoints[anchor_idx2]

    # Calculate the scaling factor (distance between anchor keypoints)
    scale = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

    # Calculate the anchor point (e.g., midpoint)
    anchor_x, anchor_y = (x1 + x2) / 2, (y1 + y2) / 2

    # Normalize all keypoints
    normalized_keypoints = []
    for x, y in keypoints:
        norm_x = (x - anchor_x) / scale
        norm_y = (y - anchor_y) / scale
        normalized_keypoints.append((norm_x, norm_y))

    return np.array(normalized_keypoints)

In [3]:
def compute_pose_similarity(pose1, pose2):
    # Ensure the two poses have the same number of keypoints
    assert len(pose1) == len(pose2), "Poses must have the same number of keypoints"

    # Compute average Euclidean distance
    distances = np.sqrt(np.sum((pose1 - pose2) ** 2, axis=1))
    return np.mean(distances)

In [4]:
from ultralytics import YOLO

# Load a pretrained YOLO11n-pose Pose model
model = YOLO("yolo11m-pose.pt")

In [12]:
# Run inference on an image
results = model("tframe1.png", conf=0.3, imgsz=160, max_det=1)  # results list
for r in results:
    pose1 = r.keypoints.xy.numpy()
    print("Calib image")
    print(pose1)  # print the Keypoints object containing the detected keypoints


image 1/1 c:\Users\uprao\OneDrive\Desktop\MotionMatch\model\tframe1.png: 96x160 1 person, 123.3ms
Speed: 1.0ms preprocess, 123.3ms inference, 1.0ms postprocess per image at shape (1, 3, 96, 160)
Calib image
[[[     950.82      368.16]
  [     962.82      354.31]
  [     936.17      353.59]
  [     974.55      365.96]
  [     910.03      362.95]
  [     997.05      459.97]
  [     866.19      455.27]
  [     1017.2      570.62]
  [     836.81      563.96]
  [     1022.8      675.67]
  [     836.25      663.35]
  [     967.71      676.95]
  [     881.57      675.57]
  [     957.33      851.62]
  [     880.85       853.3]
  [     945.87      1002.2]
  [     882.83      1005.7]]]


In [19]:
results2 = model("pushup.png", conf=0.3, imgsz=160, max_det=1)  # results list
for r in results2:
    pose2 = r.keypoints.xy.numpy()
    print("Test image")
    print(pose2)  # print the Keypoints object containing the detected keypoints


image 1/1 c:\Users\uprao\OneDrive\Desktop\MotionMatch\model\pushup.png: 96x160 1 person, 116.0ms
Speed: 1.2ms preprocess, 116.0ms inference, 2.0ms postprocess per image at shape (1, 3, 96, 160)
Test image
[[[     294.63      530.11]
  [        292      494.98]
  [          0           0]
  [      340.1      449.56]
  [          0           0]
  [     443.89      506.41]
  [     475.78      506.65]
  [     607.23      724.73]
  [     628.55      672.43]
  [     620.05      939.11]
  [     612.77      917.53]
  [     922.53      593.46]
  [     940.98      624.44]
  [     1160.4      768.36]
  [     1184.6      778.77]
  [     1537.4      740.64]
  [     1514.1      716.59]]]


In [None]:
# Example usage
# pose1 = np.array([[100, 200], [120, 220], [110, 250]])  # Keypoints for video 1
# pose2 = np.array([[200, 400], [240, 440], [220, 500]])  # Keypoints for video 2

In [20]:
# Normalize poses
pose1_normalized = normalize_keypoints(pose1[0], anchor_idx1=5, anchor_idx2=6)
print(pose1_normalized)

[[    0.14663    -0.68319]
 [    0.23829    -0.78902]
 [    0.03475     -0.7945]
 [    0.32791    -0.69998]
 [   -0.16488    -0.72303]
 [    0.49968    0.017956]
 [   -0.49968   -0.017956]
 [    0.65369     0.86295]
 [   -0.72406      0.8121]
 [    0.69632      1.6652]
 [   -0.72832      1.5712]
 [    0.27567       1.675]
 [   -0.38217      1.6644]
 [    0.19633      3.0089]
 [   -0.38772      3.0218]
 [    0.10883      4.1587]
 [   -0.37261      4.1853]]


In [21]:
pose2_normalized = normalize_keypoints(pose2[0], anchor_idx1=5, anchor_idx2=6)
print(pose2_normalized)

[[    -5.1798     0.73927]
 [    -5.2623    -0.36226]
 [    -14.417     -15.882]
 [     -3.754     -1.7861]
 [    -14.417     -15.882]
 [   -0.49999  -0.0037403]
 [    0.49999   0.0037403]
 [     4.6215      6.8415]
 [     5.2899      5.2017]
 [     5.0233      13.563]
 [     4.7952      12.886]
 [     14.507      2.7256]
 [     15.086       3.697]
 [     21.964      8.2093]
 [     22.725      8.5356]
 [     33.787      7.3401]
 [     33.056      6.5861]]


In [22]:
# Compute similarity
similarity = compute_pose_similarity(pose1_normalized, pose2_normalized)
print("Pose Difference:", similarity)


Pose Difference: 14.245955688134938


In [17]:
def compute_cosine_similarity(pose1, pose2):
    # Flatten the keypoints into a single vector
    pose1_vector = pose1.flatten()
    pose2_vector = pose2.flatten()

    # Compute dot product and magnitudes
    dot_product = np.dot(pose1_vector, pose2_vector)
    magnitude1 = np.linalg.norm(pose1_vector)
    magnitude2 = np.linalg.norm(pose2_vector)

    # Avoid division by zero
    if magnitude1 == 0 or magnitude2 == 0:
        return 0  # Treat zero-magnitude vectors as orthogonal

    # Compute cosine similarity
    return dot_product / (magnitude1 * magnitude2)

In [23]:
cos_similarity = compute_cosine_similarity(pose1_normalized, pose2_normalized)
print("Cosine Similarity:", cos_similarity)

Cosine Similarity: 0.28882775953737383
