In [None]:
%cd ../..
%load_ext autoreload

%autoreload 2

In [None]:
import os
import cv2
import pandas as pd
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import NearestNeighbors

from rts.client.get_content import get_features
from rts.features.pose import draw_pose, process_all_poses
from rts.features.pose import KEYPOINTS_NAMES, ANGLES_ASSOCIATIONS

# Get all poses in the DB

In [None]:
poses = get_features(feature_type="pose", page_size=100, max_features=100)
pose_df = process_all_poses(poses)

In [None]:
pose_df.sport.value_counts()

## Define sample

In [None]:
n_sample = 10000 # number of samples per sport
sample = []
for sport in pose_df.sport.unique():
    pose_sport = pose_df[pose_df.sport == sport].reset_index(drop=True)
    if len(pose_sport) > 0.8 * n_sample:
        sample.extend(pose_sport.sample(np.min([pose_sport.shape[0], n_sample])).to_dict(orient="records"))
sample = pd.DataFrame(sample)

In [None]:
sample.sport.value_counts()

# Similar poses

In [None]:
def get_nearest_neighbors(input_pose, n_neighbors=5, metric = "cosine", dist_threshold = 0.05):
    other_poses = pose_df[pose_df["media_id"] != input_pose["media_id"]].reset_index(drop=True)
    nbrs = NearestNeighbors(n_neighbors=n_neighbors, metric=metric).fit(other_poses["angle_vec"].tolist())
    distances, indices = nbrs.kneighbors([input_pose["angle_vec"]])
    results = other_poses.iloc[indices[0]].reset_index(drop=True)
    results["distance"] = distances[0]
    results = results[results["distance"] < dist_threshold]
    return results

In [None]:
n_neighbors = 5
for i in range(5):
    input_pose = pose_df.sample(1).iloc[0]
    results = get_nearest_neighbors(input_pose, n_neighbors=n_neighbors)

    fig, axs = plt.subplots(nrows=1, ncols=n_neighbors + 1, figsize=((n_neighbors + 1) * 3, 3))
    axs = axs.flatten()
    draw_pose(input_pose, ax = axs[0], cut = True)
    axs[0].set_title("Input pose")
    for i, pose in results.iterrows():
        draw_pose(pose, ax = axs[i+1], cut = True)
    plt.show()

# Matching Analytics

In [None]:
def matching_analytics(input_pose, n_neighbors=100, dist_threshold=0.05, show_top_n=5):
    results = get_nearest_neighbors(input_pose, n_neighbors=n_neighbors, dist_threshold=dist_threshold)
    results["sport"].value_counts()

    # Top 5 poses
    fig, axs = plt.subplots(nrows=1, ncols=show_top_n + 1, figsize=((show_top_n + 1) * 3, 3))
    axs = axs.flatten()
    draw_pose(input_pose, ax = axs[0], cut = True)
    axs[0].set_title("Input pose")
    for i, pose in results[:show_top_n].iterrows():
        draw_pose(pose, ax = axs[i+1], cut = True)
    plt.suptitle(f"Top {show_top_n} nearest neighbors", fontsize = 20, y=1.03)
    plt.show()

    # Analytics
    fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(16, 16), gridspec_kw={'width_ratios': [1, 3], 'height_ratios': [1, 2, 2]})
    axs = axs.flatten()

    axs[0].boxplot(results["distance"])
    axs[0].set_title("Distances")

    sports_counts = results["sport"].value_counts()
    axs[1].bar(sports_counts.index, sports_counts.values)
    axs[1].set_title(f"Sports (n={len(results)})")
    axs[1].set_xticklabels(sports_counts.index, rotation=45)


    input_keypoints_scores = [k[2] for k in input_pose["keypoints"]]
    keypoints_df_scores = pd.DataFrame(results["keypoints"].apply(lambda x: [k[2] for k in x]).tolist(), columns=KEYPOINTS_NAMES)

    axs[2].barh(KEYPOINTS_NAMES, input_keypoints_scores)
    axs[2].set_title("Input keypoints scores")
    keypoints_df_scores.boxplot(column=KEYPOINTS_NAMES, vert=False, ax=axs[3], grid=False)
    axs[3].set_title("Mean keypoints scores of nearest neighbors")

    input_angle_scores = input_pose["angle_score"]
    angle_df_scores = pd.DataFrame(results["angle_score"].tolist(), columns=ANGLES_ASSOCIATIONS.keys())

    axs[4].barh(list(ANGLES_ASSOCIATIONS.keys()), input_angle_scores)
    axs[4].set_title("Input angles scores")
    angle_df_scores.boxplot(column=list(ANGLES_ASSOCIATIONS.keys()), vert=False, ax=axs[5], grid=False)
    axs[5].set_title("Mean angles scores of nearest neighbors")

    plt.suptitle(f"Nearest neighbors analytics for {input_pose['video_name']} ({input_pose['sport']})", fontsize = 20, y=1.03)
    plt.tight_layout()
    plt.show()

    return results

In [None]:
input_pose = pose_df.sample(1).iloc[0]

results = matching_analytics(input_pose)

# Analytics by sports

In [None]:
sample = sample[sample["sport"] != "Non-Sport"]

n_neighbors = 100
nbrs = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine').fit(sample["angle_vec"].tolist())
distances, indices = nbrs.kneighbors()

sample["distance"] = distances.tolist()
sample["nbrs_indices"] = indices.tolist()
sample["nbrs_sports"] = sample["nbrs_indices"].map(lambda x: sample.iloc[x]["sport"].value_counts().to_dict())

In [None]:
sports = sample.sport.unique()
sample["nbrs_sports"] = sample.nbrs_sports.map(lambda x: Counter({sport: x.get(sport, 0) for sport in sports}))
sample["nbrs_props"] = sample.nbrs_sports.map(lambda x: Counter({k: v / sum(x.values()) for k, v in x.items()}))

In [None]:
nbrs_sports_counts = sample.groupby("sport").nbrs_sports.sum()
nbrs_sports_props = nbrs_sports_counts.apply(lambda x: Counter({sport: x.get(sport, 0) / np.sum(list(x.values())) for sport in sports}))[sports]

In [None]:
from collections import defaultdict

def merge_and_mean_dicts(dicts):
    result_dict = defaultdict(list)
    
    for d in dicts:
        for key, value in d.items():
            result_dict[key].append(value)
    
    mean_dict = {key: sum(values) / len(values) for key, values in result_dict.items()}
    return mean_dict

nbrs_sports_props = sample.groupby("sport").nbrs_props.agg(merge_and_mean_dicts)[sports]

In [None]:
proportions = np.array(nbrs_sports_props.apply(lambda x: list(x.values())).values.tolist())
labels = nbrs_sports_props.index

plt.figure(figsize=(14, 10))
sns.heatmap(proportions, annot=False, xticklabels=labels, yticklabels=labels, cmap="Blues")
plt.title("Proportion of sports nearest neighbors (top 100)")
plt.show()

# Finding poses specific to each sport

In [None]:
sample["props_same_sport"] = sample.apply(lambda df: df["nbrs_props"][df["sport"]], axis=1)

In [None]:
props_per_sport = sample.groupby("sport").props_same_sport.mean()

plt.figure(figsize=(10, 8))
plt.barh(props_per_sport.index, props_per_sport.values)
plt.title("Proportion of same sport nearest neighbors (top 100)")
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
for sport in sample.sport.unique():
    props_poses_unique = []
    for t in np.arange(0, 1.0, 0.1):
        sport_df = sample[sample.sport == sport]
        prop = sport_df[sport_df.props_same_sport > t].shape[0] / sport_df.shape[0]
        props_poses_unique.append(prop)
    plt.plot(np.arange(0, 1.0, 0.1), props_poses_unique, label = sport)

props_poses_unique = []
for t in np.arange(0, 1.0, 0.1):
    prop = sample[sample.props_same_sport > t].shape[0] / sample.shape[0]
    props_poses_unique.append(prop)
plt.plot(np.arange(0, 1.0, 0.1), props_poses_unique, label = "All Sports", ls = "--", color = "black")

plt.title("Proportion of unique poses (top 100)")
plt.xlabel("Threshold")
plt.ylabel("Proportion of nearest poses in the same sport")
plt.legend()
plt.xlim(0,1)
plt.show()

### Check unique poses

In [None]:
pose_df[pose_df.props_same_sport > 0.6].sport.value_counts()

In [None]:
threshold = 0.4
unique_poses = pose_df[pose_df.props_same_sport > threshold].reset_index(drop=True)

for sport in unique_poses.sport.unique():
    n_sample = np.min([unique_poses[unique_poses.sport == sport].shape[0], 5])
    sample_poses = unique_poses[unique_poses.sport == sport].sample(n_sample).reset_index(drop = True)
    
    fig, axs = plt.subplots(nrows=1, ncols=n_sample, figsize=(3 * n_sample, 3))
    if n_sample == 1:
        axs = [axs]
    else:
        axs = axs.flatten()
    
    for i, pose in sample_poses.iterrows():
        draw_pose(pose, ax = axs[i], cut = True)
    plt.suptitle(f"Unique poses for {sport}", fontsize = 20, y=1.05)
    plt.show()

# Walkthrough

In [None]:
def walkthrough(start_pose_id, end_pose_id, n_steps, n_neighbors=1, metric = "cosine", unique = True):
    start_pose = pose_df.iloc[start_pose_id]
    end_pose = pose_df.iloc[end_pose_id]
    start_feature = start_pose["angle_vec"]
    end_feature = end_pose["angle_vec"]

    path_poses_ids = [start_pose_id]
    
    for i in range(1,n_steps+1):
        w2 = i/(n_steps+1)
        w1 =  1 -  w2
        feature = np.array(start_feature) * w1 + np.array(end_feature) * w2
        nbrs = get_nearest_neighbors(input_pose, n_neighbors=n_neighbors, metric=metric, dist_threshold=0.5)
        nbrs = NearestNeighbors(n_neighbors=n_neighbors, metric=metric).fit(pose_df["angle_vec"].tolist())
        distances, candidates = nbrs.kneighbors(feature.reshape(1, -1))
        candidates = candidates[0]
        if unique:   
            candidates = [c for c in candidates if c not in path_poses_ids]

        next_pose_id = np.random.choice(candidates)
        path_poses_ids.append(next_pose_id)
        
    path_poses_ids.append(end_pose_id)

    return pose_df.iloc[path_poses_ids].reset_index(drop=False)

In [None]:
for _ in range(10):
    sample = pose_df.sample(2).index

    test = walkthrough(sample[0], sample[1], n_steps = 10, n_neighbors = 5, unique = True)

    fig, axs = plt.subplots(nrows=1, ncols=test.shape[0], figsize=(3 * test.shape[0], 3))
    if test.shape[0] == 1:
        axs = [axs]
    else:
        axs = axs.flatten()

    for i, pose in test.iterrows():
        draw_pose(pose, ax = axs[i], cut = True)
    plt.show()

## 2D visualization

In [None]:
import umap

In [None]:
sample = pose_df.sample(2).index
path = walkthrough(sample[0], sample[1], n_steps = 100, n_neighbors = 100, unique = True)

reducer = umap.UMAP(n_neighbors=5, min_dist=0.3, metric='cosine')
embedding = reducer.fit_transform(path["angle_vec"].tolist())

plt.figure(figsize=(10, 10))
plt.scatter(embedding[:, 0], embedding[:, 1], c="grey", alpha=0.5)
plt.scatter(embedding[0, 0], embedding[0, 1], c="red", alpha=1, label = "Start")
plt.scatter(embedding[-1, 0], embedding[-1, 1], c="green", alpha=1, label = "End")
plt.legend()
plt.title("UMAP embedding of poses")
plt.show()