In [None]:
%cd ../..
%load_ext autoreload

%autoreload 2

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import umap
import cv2

from emv.features.pose import draw_pose, process_all_poses
from emv.client.get_content import get_features

# Load poses

In [None]:
poses = get_features(feature_type="pose", page_size=100, max_features=10000)
pose_df = process_all_poses(poses)

In [None]:
n_sample = 10000 # number of samples per sport
sample = []
for sport in pose_df.sport.unique():
    pose_sport = pose_df[pose_df.sport == sport].reset_index(drop=True)
    if len(pose_sport) > 0.8 * n_sample:
        sample.extend(pose_sport.sample(np.min([pose_sport.shape[0], n_sample])).to_dict(orient="records"))
pose_df = pd.DataFrame(sample)

# Visualize poses

### Plot sample of poses

In [None]:
# create a grid of subplots
nrows = 3
ncols = 6
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * 3, nrows * 3))
axs = axs.flatten()
# iterate over the poses and draw each pose in a subplot
sample_poses = pose_df.sample(nrows * ncols).reset_index(drop=True)
for i, pose in sample_poses.iterrows():
    draw_pose(pose, ax = axs[i], cut = True)

plt.show()


# Dimensionality Reduction

In [None]:
pose_df.shape

In [None]:
reducer = umap.UMAP(n_neighbors=500, min_dist=0.3, metric='cosine')
embedding = reducer.fit_transform(pose_df["angle_vec"].tolist())

In [None]:
pose_df["x"] = embedding[:, 0]
pose_df["y"] = embedding[:, 1]

plt.figure(figsize=(10, 10))
for sport in pose_df.sport.unique():
    pose_sport = pose_df[pose_df.sport == sport].reset_index(drop=True)
    plt.scatter(pose_sport["x"], pose_sport["y"], s=0.01, label=sport)
plt.legend(markerscale=30)
plt.show()

In [None]:
n_neighbors = [50, 100, 250, 500, 1000]
min_dists = [0.1, 0.3, 0.5, 0.7, 0.9]

fig, axs = plt.subplots(nrows=len(n_neighbors), ncols=len(min_dists), figsize=(20, 20))

for i,n in enumerate(n_neighbors):
    for j,d in enumerate(min_dists):
        reducer = umap.UMAP(n_neighbors=n, min_dist=d, metric='cosine')
        embedding = reducer.fit_transform(pose_df["angle_vec"].tolist())

        axs[i,j].scatter(embedding[:, 0], embedding[:, 1], s=1)
        axs[i,j].set_xticks([])
        axs[i,j].set_yticks([])
        if i == 0:
            axs[i,j].set_title(f"min_dist = {d}")
        if j == 0:
            axs[i,j].set_ylabel(f"n_neighbors = {n}")
plt.show()

## Top Sports

In [None]:
top_sports = pose_df.groupby("sport").count().sort_values("video_name", ascending=False).head(10).index.tolist()
top_sports_df = pose_df[pose_df["sport"].isin(top_sports)]
top_sports_df.shape

In [None]:
reducer = umap.UMAP(n_neighbors=1000, min_dist=0.3, metric='cosine')
embedding = reducer.fit_transform(top_sports_df["angle_vec"].tolist())
top_sports_df["umap_x"] = embedding[:,0]
top_sports_df["umap_y"] = embedding[:,1]

In [None]:
plt.figure(figsize=(10, 10))
for sport in top_sports_df["sport"].unique():
    plt.scatter(top_sports_df[top_sports_df["sport"] == sport]["umap_x"], 
                top_sports_df[top_sports_df["sport"] == sport]["umap_y"], 
                s= 1, label = sport)
plt.gca().set_aspect('equal', 'datalim')
plt.legend(markerscale = 5)
plt.title('UMAP projection of the pose angles', fontsize=24)
plt.show()

## Bokeh plot

In [None]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, ImageURL
from bokeh.io import push_notebook
import base64
from io import BytesIO

output_notebook()

In [None]:
def image_to_base64(image_path):
    with open(image_path, "rb") as f:
        image_binary = f.read()
    return "data:image/png;base64," + base64.b64encode(image_binary).decode()

def create_annotation_image(pose):
    fig, ax = plt.subplots(figsize=(6,6))
    draw_pose(pose, ax=ax, cut=True)
    tmp_path = "data/test_images/tmp.png"
    plt.savefig(tmp_path, dpi=100, bbox_inches='tight')
    plt.close(fig)
    
    return image_to_base64(tmp_path)


In [None]:
N_POSES = 1000
EVERY_N = 5
embedded_images = [create_annotation_image(pose) for pose in poses[::EVERY_N][:N_POSES]]

In [None]:
reducer = umap.UMAP(n_neighbors=int(0.1 * N_POSES), min_dist=0.9, metric='cosine')
embedding = reducer.fit_transform([p["angle_vec"] for p in poses[::EVERY_N][:N_POSES]])

In [None]:
# Example 2D points
x = embedding[:, 0]
y = embedding[:, 1]

# Create a ColumnDataSource
source = ColumnDataSource(data=dict(x=x, y=y, url=embedded_images))

# Output to notebook
output_notebook()

# Create a new plot
p = figure(width=1000, height=1000)


# Add images
p.image_url(url='url', x='x', y='y', source=source, w=0.3, h=0.3, anchor="center")

# Show the plot
handle = show(p, notebook_handle=True)