In [1]:
import numpy as np

In [2]:
def normalize_keypoints(keypoints, anchor_idx1, anchor_idx2):
    # Extract reference keypoints
    x1, y1 = keypoints[anchor_idx1]
    x2, y2 = keypoints[anchor_idx2]

    # Calculate the scaling factor (distance between anchor keypoints)
    scale = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

    # Calculate the anchor point (e.g., midpoint)
    anchor_x, anchor_y = (x1 + x2) / 2, (y1 + y2) / 2

    # Normalize all keypoints
    normalized_keypoints = []
    for x, y in keypoints:
        norm_x = (x - anchor_x) / scale
        norm_y = (y - anchor_y) / scale
        normalized_keypoints.append((norm_x, norm_y))

    return np.array(normalized_keypoints)

In [3]:
def compute_pose_similarity(pose1, pose2):
    # Ensure the two poses have the same number of keypoints
    assert len(pose1) == len(pose2), "Poses must have the same number of keypoints"

    # Compute average Euclidean distance
    distances = np.sqrt(np.sum((pose1 - pose2) ** 2, axis=1))
    return np.mean(distances)

In [4]:
from ultralytics import YOLO

# Load a pretrained YOLO11n-pose Pose model
model = YOLO("yolo11m-pose.pt")

In [5]:
# Run inference on an image
results = model("tframe1.png", conf=0.3, imgsz=160, max_det=1)  # results list
for r in results:
    pose1 = r.keypoints.xy.numpy()
    print("Calib image")
    print(pose1)  # print the Keypoints object containing the detected keypoints


image 1/1 c:\Users\uprao\OneDrive\Desktop\MotionMatch\model\tframe1.png: 96x160 1 person, 289.3ms
Speed: 8.9ms preprocess, 289.3ms inference, 22.3ms postprocess per image at shape (1, 3, 96, 160)
Calib image
[[[     950.82      368.16]
  [     962.82      354.31]
  [     936.17      353.59]
  [     974.55      365.96]
  [     910.03      362.95]
  [     997.05      459.97]
  [     866.19      455.27]
  [     1017.2      570.62]
  [     836.81      563.96]
  [     1022.8      675.67]
  [     836.25      663.35]
  [     967.71      676.95]
  [     881.57      675.57]
  [     957.33      851.62]
  [     880.85       853.3]
  [     945.87      1002.2]
  [     882.83      1005.7]]]


In [6]:
results2 = model("uframe1.png", conf=0.3, imgsz=160, max_det=1)  # results list
for r in results2:
    pose2 = r.keypoints.xy.numpy()
    print("Test image")
    print(pose2)  # print the Keypoints object containing the detected keypoints


image 1/1 c:\Users\uprao\OneDrive\Desktop\MotionMatch\model\uframe1.png: 96x160 1 person, 121.0ms
Speed: 4.4ms preprocess, 121.0ms inference, 1.1ms postprocess per image at shape (1, 3, 96, 160)
Test image
[[[     997.15      360.81]
  [     1011.8      346.19]
  [     981.86      345.78]
  [     1026.8      358.12]
  [     958.14      357.21]
  [     1052.9      451.29]
  [     923.83      456.15]
  [     1077.1      562.16]
  [     894.67      576.18]
  [     1090.9      661.85]
  [     872.84       684.1]
  [       1028      664.26]
  [      944.3      666.72]
  [     1032.6       815.5]
  [     957.08      819.03]
  [     1022.5      948.06]
  [      963.1      953.92]]]


In [None]:
# Example usage
# pose1 = np.array([[100, 200], [120, 220], [110, 250]])  # Keypoints for video 1
# pose2 = np.array([[200, 400], [240, 440], [220, 500]])  # Keypoints for video 2

In [6]:
# Normalize poses
pose1_normalized = normalize_keypoints(pose1[0], anchor_idx1=5, anchor_idx2=6)
print(pose1_normalized)

[[    0.14663    -0.68319]
 [    0.23829    -0.78902]
 [    0.03475     -0.7945]
 [    0.32791    -0.69998]
 [   -0.16488    -0.72303]
 [    0.49968    0.017956]
 [   -0.49968   -0.017956]
 [    0.65369     0.86295]
 [   -0.72406      0.8121]
 [    0.69632      1.6652]
 [   -0.72832      1.5712]
 [    0.27567       1.675]
 [   -0.38217      1.6644]
 [    0.19633      3.0089]
 [   -0.38772      3.0218]
 [    0.10883      4.1587]
 [   -0.37261      4.1853]]


In [19]:
pose2_normalized = normalize_keypoints(pose2[0], anchor_idx1=5, anchor_idx2=6)
print(pose2_normalized)

[[   0.068233    -0.71957]
 [    0.18147    -0.83275]
 [  -0.050169    -0.83597]
 [    0.29803    -0.74037]
 [    -0.2339    -0.74743]
 [    0.49965   -0.018805]
 [   -0.49965    0.018805]
 [    0.68753     0.83979]
 [   -0.72548     0.94834]
 [    0.79389      1.6118]
 [   -0.89452      1.7842]
 [    0.30746      1.6305]
 [   -0.34111      1.6496]
 [      0.343      2.8017]
 [   -0.24214      2.8291]
 [    0.26457      3.8284]
 [   -0.19546      3.8738]]


In [22]:
# Compute similarity
similarity = compute_pose_similarity(pose1_normalized, pose2_normalized)
print("Pose Difference:", similarity)


Pose Difference: 14.245955688134938


In [16]:
def compute_cosine_similarity(pose1, pose2):
    # Flatten the keypoints into a single vector
    pose1_vector = pose1.flatten()
    pose2_vector = pose2.flatten()

    # Compute dot product and magnitudes
    dot_product = np.dot(pose1_vector, pose2_vector)
    magnitude1 = np.linalg.norm(pose1_vector)
    magnitude2 = np.linalg.norm(pose2_vector)

    # Avoid division by zero
    if magnitude1 == 0 or magnitude2 == 0:
        return 0  # Treat zero-magnitude vectors as orthogonal

    # Compute cosine similarity
    return dot_product / (magnitude1 * magnitude2)

In [17]:
cos_similarity = compute_cosine_similarity(pose1_normalized, pose2_normalized)
print("Cosine Similarity:", cos_similarity)

Cosine Similarity: 1.0


In [8]:
print(pose1_normalized[9:11])

[[    0.69632      1.6652]
 [   -0.72832      1.5712]]
