In [None]:
%cd ../..
%load_ext autoreload

%autoreload 2

In [None]:
import os
import cv2
import pandas as pd
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import NearestNeighbors 
from sklearn.cluster import DBSCAN, KMeans
from umap import UMAP

from emv.features.pose import load_poses, sample_from_sports
from emv.features.pose_utils import draw_pose, KEYPOINTS_NAMES, CONNECTIONS, ANGLES_ASSOCIATIONS

# Get all poses in the DB

In [None]:
local_poses_path = "data/pose_df.csv"
pose_df = load_poses(local_fp = local_poses_path, max_poses = None)

In [None]:
pose_df.head()

In [None]:
pose_df.sport.value_counts()

In [None]:
pose_df.to_csv("data/pose_df.csv", index=False)

In [None]:
pose_df = sample_from_sports(pose_df, n_per_sports=1000)

# Add directional features

In [None]:
def get_directions(keypoints):
    directions = []
    for connection in CONNECTIONS:
        kp1, kp2 = connection
        x1, y1, c1 = keypoints[KEYPOINTS_NAMES.index(kp1)]
        x2, y2, c2 = keypoints[KEYPOINTS_NAMES.index(kp2)]
        directions.append((x2 - x1, y2 - y1))
    directions = np.array(directions)
    
    return directions

In [None]:
def draw_directions(pose, ax):
    draw_pose(pose, ax = ax, show_frame = False, cut = True)
    directions = get_directions(pose.keypoints)
    for i, connection in enumerate(CONNECTIONS):
        kp1, kp2 = connection
        x1, y1, c1 = pose.keypoints[KEYPOINTS_NAMES.index(kp1)]
        x2, y2, c2 = pose.keypoints[KEYPOINTS_NAMES.index(kp2)]
        dx, dy = directions[i]
        ax.arrow(x1, y1, dx, dy, color = "r", width = 0.2)
    return ax

In [None]:
pose_df["directions"] = pose_df.keypoints.apply(get_directions)
pose_df["norm_directions"] = pose_df.directions.apply(lambda x: x.flatten() / np.linalg.norm(x.flatten()))
pose_df["feature_angle_dir"] = pose_df.apply(lambda df: np.concatenate([df.norm_directions, df.angle_vec]), axis = 1)

# Comparison of distance metrics between poses

In [None]:
def get_nearest_neighbors(input_pose, feature, n_neighbors=5, metric = "cosine", dist_threshold = 0.05):
    other_poses = pose_df[pose_df["media_id"] != input_pose["media_id"]].reset_index(drop=True)
    nbrs = NearestNeighbors(n_neighbors=n_neighbors, metric=metric).fit(other_poses[feature].tolist())
    distances, indices = nbrs.kneighbors([input_pose[feature]])
    results = other_poses.iloc[indices[0]].reset_index(drop=True)
    results["distance"] = distances[0]
    results = results[results["distance"] < dist_threshold].reset_index(drop=True)
    return results

In [None]:
n_neighbors = 5
nrows = 3
for _ in range(5):
    input_pose = pose_df.sample(1).iloc[0]
    results_cosine = get_nearest_neighbors(input_pose, feature="angle_vec", n_neighbors=n_neighbors, metric="cosine", dist_threshold=1)
    results_euclidean = get_nearest_neighbors(input_pose, feature="angle_vec", n_neighbors=n_neighbors, metric="euclidean", dist_threshold=1)
    results_dirs = get_nearest_neighbors(input_pose, feature="feature_angle_dir", n_neighbors=n_neighbors, metric="cosine", dist_threshold=1)

    fig, axs = plt.subplots(nrows=nrows, ncols=n_neighbors + 1, figsize=(n_neighbors * 2, nrows * 2))
    
    for i in range(3):
        draw_pose(input_pose, ax = axs[i, 0], show_frame = False, cut = True, color = "red")

    for i in range(n_neighbors):
        draw_pose(results_cosine.iloc[i], ax = axs[0, i + 1], show_frame = False, cut = True)
        draw_pose(results_euclidean.iloc[i], ax = axs[1, i + 1], show_frame = False, cut = True)
        draw_pose(results_dirs.iloc[i], ax = axs[2, i + 1], show_frame = False, cut = True)

    axs[0, 1].set_title("Cosine metric")
    axs[1, 1].set_title("Euclidean metric")
    axs[2, 1].set_title("Directions")
    plt.show()

# Matching Analytics

In [None]:
def angles_similarity(angles1, angles2):
    differences = np.abs(np.array(angles1) - np.array(angles2))
    similarity = np.mean(differences)
    
    return similarity

In [None]:
def keypoints_pairwise(k1, k2, threshold = 0.6):
    diffs = [np.linalg.norm(np.array([k1[i][0], k1[i][1]]) - np.array([k2[i][0], k2[i][1]])) for i in range(len(k1)) if k1[i][2] > threshold and k2[i][2] > threshold]
    return np.mean(diffs)

In [None]:
sample = pose_df.sample(1000).reset_index(drop=True)

features = ["angle_vec", "feature_angle_dir"]
metrics = ["cosine", "euclidean"]

results = []
for feature in features:
    for metric in metrics:
        nbrs = NearestNeighbors(n_neighbors=100, metric=metric).fit(sample[feature].tolist())
        distances, indices = nbrs.kneighbors()

        sample["nbrs"] = indices.tolist()
        sample["pairwise_d"] = sample.apply(lambda df: [keypoints_pairwise(df.keypoints, sample.iloc[i].keypoints) for i in df.nbrs], axis = 1)
        sample["mean_pairwise_d"] = sample.pairwise_d.apply(np.mean)

        results.append({
            "feature": feature,
            "metric": metric,
            "mean_pairwise_d": sample.mean_pairwise_d.mean(),
            "std_pairwise_d": sample.mean_pairwise_d.std()
        })

In [None]:
plt.figure(figsize=(10, 5))
plt.errorbar(x = [f"{r['feature']}_{r['metric']}" for r in results], y = [r["mean_pairwise_d"] for r in results], yerr = [r["std_pairwise_d"] for r in results], fmt = "+")
plt.show()

In [None]:
def matching_analytics(input_pose, metric = "cosine", n_neighbors=100, dist_threshold=0.05, show_top_n=5):
    results = get_nearest_neighbors(input_pose, n_neighbors=n_neighbors, metric=metric, dist_threshold=dist_threshold)
    results["sport"].value_counts()

    # Top 5 poses
    fig, axs = plt.subplots(nrows=1, ncols=show_top_n + 1, figsize=((show_top_n + 1) * 3, 3))
    axs = axs.flatten()
    draw_pose(input_pose, ax = axs[0], cut = True)
    axs[0].set_title("Input pose")
    for i, pose in results[:show_top_n].iterrows():
        draw_pose(pose, ax = axs[i+1], cut = True)
    plt.suptitle(f"Top {show_top_n} nearest neighbors", fontsize = 20, y=1.03)
    plt.show()

    # Analytics
    fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(16, 16), gridspec_kw={'width_ratios': [1, 3], 'height_ratios': [1, 2, 2]})
    axs = axs.flatten()

    axs[0].boxplot(results["distance"])
    axs[0].set_title("Distances")

    sports_counts = results["sport"].value_counts()
    axs[1].bar(sports_counts.index, sports_counts.values)
    axs[1].set_title(f"Sports (n={len(results)})")
    axs[1].set_xticklabels(sports_counts.index, rotation=45)


    input_keypoints_scores = [k[2] for k in input_pose["keypoints"]]
    keypoints_df_scores = pd.DataFrame(results["keypoints"].apply(lambda x: [k[2] for k in x]).tolist(), columns=KEYPOINTS_NAMES)

    axs[2].barh(KEYPOINTS_NAMES, input_keypoints_scores)
    axs[2].set_title("Input keypoints scores")
    keypoints_df_scores.boxplot(column=KEYPOINTS_NAMES, vert=False, ax=axs[3], grid=False)
    axs[3].set_title("Mean keypoints scores of nearest neighbors")

    input_angle_scores = input_pose["angle_score"]
    angle_df_scores = pd.DataFrame(results["angle_score"].tolist(), columns=ANGLES_ASSOCIATIONS.keys())

    axs[4].barh(list(ANGLES_ASSOCIATIONS.keys()), input_angle_scores)
    axs[4].set_title("Input angles scores")
    angle_df_scores.boxplot(column=list(ANGLES_ASSOCIATIONS.keys()), vert=False, ax=axs[5], grid=False)
    axs[5].set_title("Mean angles scores of nearest neighbors")

    plt.suptitle(f"Nearest neighbors analytics for {input_pose['video_name']} ({input_pose['sport']})", fontsize = 20, y=1.03)
    plt.tight_layout()
    plt.show()

    return results

In [None]:
input_pose = pose_df.sample(1).iloc[0]

results = matching_analytics(input_pose, metric = angles_similarity)

# Analytics by sports

In [None]:
pose_df = pose_df[pose_df["sport"] != "Non-Sport"]

n_neighbors = 100
nbrs = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine').fit(pose_df["feature_angle_dir"].tolist())
distances, indices = nbrs.kneighbors()

pose_df["distance"] = distances.tolist()
pose_df["nbrs_indices"] = indices.tolist()
pose_df["nbrs_sports"] = pose_df["nbrs_indices"].map(lambda x: pose_df.iloc[x]["sport"].value_counts().to_dict())

In [None]:
sports = pose_df.sport.unique()
pose_df["nbrs_sports"] = pose_df.nbrs_sports.map(lambda x: Counter({sport: x.get(sport, 0) for sport in sports}))
pose_df["nbrs_props"] = pose_df.nbrs_sports.map(lambda x: Counter({k: v / sum(x.values()) for k, v in x.items()}))

In [None]:
nbrs_sports_counts = pose_df.groupby("sport").nbrs_sports.sum()
nbrs_sports_props = nbrs_sports_counts.apply(lambda x: Counter({sport: x.get(sport, 0) / np.sum(list(x.values())) for sport in sports}))[sports]

In [None]:
from collections import defaultdict

def merge_and_mean_dicts(dicts):
    result_dict = defaultdict(list)
    
    for d in dicts:
        for key, value in d.items():
            result_dict[key].append(value)
    
    mean_dict = {key: sum(values) / len(values) for key, values in result_dict.items()}
    return mean_dict

nbrs_sports_props = pose_df.groupby("sport").nbrs_props.agg(merge_and_mean_dicts)[sports]

In [None]:
proportions = np.array(nbrs_sports_props.apply(lambda x: list(x.values())).values.tolist())
labels = nbrs_sports_props.index

plt.figure(figsize=(14, 10))
sns.heatmap(proportions, annot=False, xticklabels=labels, yticklabels=labels, cmap="Blues")
plt.title("Proportion of sports nearest neighbors (top 100)")
plt.show()

# Finding poses specific to each sport

In [None]:
pose_df["props_same_sport"] = pose_df.apply(lambda df: df["nbrs_props"][df["sport"]], axis=1)

In [None]:
props_per_sport = pose_df.groupby("sport").props_same_sport.mean()

plt.figure(figsize=(10, 8))
plt.barh(props_per_sport.index, props_per_sport.values)
plt.title("Proportion of same sport nearest neighbors (top 100)")
plt.show()

In [None]:
plt.figure(figsize=(12, 10))
for sport in pose_df.sport.unique():
    props_poses_unique = []
    for t in np.arange(0, 1.1, 0.1):
        sport_df = pose_df[pose_df.sport == sport]
        prop = sport_df[sport_df.props_same_sport > t].shape[0] / sport_df.shape[0]
        props_poses_unique.append(prop)
    plt.plot(np.arange(0, 1.1, 0.1), props_poses_unique, label = sport)

props_poses_unique = []
for t in np.arange(0, 1.1, 0.1):
    prop = pose_df[pose_df.props_same_sport > t].shape[0] / pose_df.shape[0]
    props_poses_unique.append(prop)
plt.plot(np.arange(0, 1.1, 0.1), props_poses_unique, label = "All Sports", ls = "--", color = "black")

plt.title("Proportion of unique poses (top 100)")
plt.xlabel("Threshold")
plt.ylabel("Proportion of nearest poses in the same sport")
plt.legend(loc = "upper right")
plt.xlim(0,1.2)
plt.show()

### Check unique poses

In [None]:
threshold = 0.5
show_n = 5
unique_poses = pose_df[pose_df.props_same_sport > threshold].reset_index(drop=True)

sports = unique_poses.sport.unique()
fig, axs = plt.subplots(nrows=len(sports), ncols=show_n, figsize=(3 * show_n, 3 * len(sports)))
for i,sport in enumerate(sports):
    n_sample = np.min([unique_poses[unique_poses.sport == sport].shape[0], show_n])
    sample_poses = unique_poses[unique_poses.sport == sport].sample(n_sample).reset_index(drop = True)
    
    for j, pose in sample_poses.iterrows():
        draw_pose(pose, ax = axs[i,j], cut = True, show_frame=False)
    if j == 0:
        axs[i,j].set_title(sport)

plt.tight_layout()
plt.show()

## Find representatives of unique poses

In [None]:
unique_poses = pose_df[pose_df.props_same_sport > 0.5].reset_index(drop=True)
unique_poses.sport.value_counts()

In [None]:
unique_poses_counts = unique_poses.sport.value_counts()
unique_poses_counts = unique_poses_counts[unique_poses_counts > 100]
unique_poses_counts.index

In [None]:
threshold = 0.5
min_n_poses = 100
unique_poses = pose_df[pose_df.props_same_sport > threshold].reset_index(drop=True)

unique_poses_counts = unique_poses.sport.value_counts()
unique_poses_counts = unique_poses_counts[unique_poses_counts > min_n_poses]
sports = unique_poses_counts.index

ncols = 4
nrows = len(sports) // ncols
if len(sports) % ncols != 0:
    nrows += 1
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(4 * ncols, 4 * nrows))
axs = axs.flatten()

for i,sport in enumerate(sports):
    umap = UMAP(n_components=2)
    sport_poses = unique_poses[unique_poses.sport == sport]
    embeddings = umap.fit_transform(sport_poses["angle_vec"].tolist())

    axs[i].scatter(embeddings[:,0], embeddings[:,1], s=2)
    axs[i].set_title(sport)
plt.show()

In [None]:
def find_representative_poses(sport, n_poses_per_cluster = 3):
    unique_poses = pose_df[pose_df.props_same_sport > threshold].reset_index(drop=True)
    sport_poses = unique_poses[unique_poses.sport == sport]

    umap = UMAP(n_components=2, n_neighbors=10, min_dist=0.1)
    embeddings = umap.fit_transform(sport_poses["angle_vec"].tolist())

    plt.figure(figsize=(6, 6))
    plt.scatter(embeddings[:,0], embeddings[:,1], s=2)
    plt.title(f"{sport} poses")
    plt.show()

    k = int(input("How many clusters do you want to find? "))
    kmeans = KMeans(n_clusters=k, random_state=0).fit(embeddings)
    sport_poses["labels"] = kmeans.labels_

    # Plot embedding colored by clusters
    plt.figure(figsize=(6, 6))
    plt.scatter(embeddings[:,0], embeddings[:,1], c=kmeans.labels_, s=2)
    plt.show()

    # Plot representative poses (3 for each cluster)
    fig, axs = plt.subplots(nrows=k, ncols=n_poses_per_cluster, figsize=(n_poses_per_cluster * 3, k * 3))

    for i in range(k):
        cluster_poses = sport_poses[sport_poses.labels == i].sample(n_poses_per_cluster).reset_index(drop=True)
        for j, pose in cluster_poses.iterrows():
            draw_pose(pose, ax = axs[i,j], cut = True)
            axs[i,j].set_title(f"Cluster {i}")
    plt.show()

In [None]:
find_representative_poses("Weightlifting")

In [None]:
find_representative_poses("Cycling")

In [None]:
find_representative_poses("Badminton")

In [None]:
find_representative_poses("Golf")

In [None]:
find_representative_poses("Sport Climbing")

# Walkthrough

In [None]:
def walkthrough(start_pose_id, end_pose_id, n_steps, n_neighbors=1, metric = "cosine", unique = True):
    start_pose = pose_df.iloc[start_pose_id]
    end_pose = pose_df.iloc[end_pose_id]
    start_feature = start_pose["angle_vec"]
    end_feature = end_pose["angle_vec"]

    path_poses_ids = [start_pose_id]
    
    for i in range(1,n_steps+1):
        w2 = i/(n_steps+1)
        w1 =  1 -  w2
        feature = np.array(start_feature) * w1 + np.array(end_feature) * w2
        nbrs = NearestNeighbors(n_neighbors=n_neighbors, metric=metric).fit(pose_df["angle_vec"].tolist())
        distances, candidates = nbrs.kneighbors(feature.reshape(1, -1))
        candidates = candidates[0]
        if unique:   
            candidates = [c for c in candidates if c not in path_poses_ids]

        next_pose_id = np.random.choice(candidates)
        path_poses_ids.append(next_pose_id)
        
    path_poses_ids.append(end_pose_id)

    return pose_df.iloc[path_poses_ids].reset_index(drop=False)

In [None]:
for _ in range(5):
    sample = pose_df.sample(2).index

    test = walkthrough(sample[0], sample[1], n_steps = 10, n_neighbors = 5, unique = True)

    fig, axs = plt.subplots(nrows=1, ncols=test.shape[0], figsize=(3 * test.shape[0], 3))
    if test.shape[0] == 1:
        axs = [axs]
    else:
        axs = axs.flatten()

    for i, pose in test.iterrows():
        draw_pose(pose, ax = axs[i], cut = True)
    plt.show()

### 2D visualization

In [None]:
import umap

In [None]:
sample = pose_df.sample(2).index
path = walkthrough(sample[0], sample[1], n_steps = 100, n_neighbors = 100, unique = True)

reducer = umap.UMAP(n_neighbors=5, min_dist=0.3, metric='cosine')
embedding = reducer.fit_transform(path["angle_vec"].tolist())

plt.figure(figsize=(10, 10))
plt.scatter(embedding[:, 0], embedding[:, 1], c="grey", alpha=0.5)
plt.scatter(embedding[0, 0], embedding[0, 1], c="red", alpha=1, label = "Start")
plt.scatter(embedding[-1, 0], embedding[-1, 1], c="green", alpha=1, label = "End")
plt.legend()
plt.title("UMAP embedding of poses")
plt.show()