In [None]:
import os
import json
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
import seaborn as sns
from rts.features.pose import compute_human_angles, reshape_keypoints
import umap

In [None]:
sns.set(style='white', context='notebook', rc={'figure.figsize':(14,10)})


In [None]:
OUTDIR = "/media/data/ioc/sequences/"

In [None]:
poses = {}

count = 0
for root, dirs, files in os.walk(OUTDIR):
    for file in files:
        if file.endswith(".json"):
            # print(os.path.join(root, file))
            fpath = os.path.join(root, file)
            with open(fpath) as f:
                data = json.load(f)
            poses[fpath] = data
            count += 1
count

In [None]:
valid_poses = defaultdict(list)

for key in poses:
    for pose in poses[key]:
        angles = compute_human_angles(reshape_keypoints(pose['keypoints']), min_confidence=0.8)
        # count how many entries are None in the list
        if angles.count(None) > 0:
            continue
        valid_poses[key].append(angles)


In [None]:
# count how many entries are in all lists
count = 0
for key in valid_poses:
    count += len(valid_poses[key])
count

In [None]:
# only keep a single pose per key and normalize max = 180, min = 0
single_pose = {}
for key in valid_poses:
    single_pose[key] = [x / 180 for x in valid_poses[key][0]]

In [None]:
reducer = umap.UMAP()

In [None]:
df = pd.DataFrame(single_pose).T

In [None]:
# don't take the first column, which is the filename
pose_array = df.loc[:, df.columns != 0].values

In [None]:
embedding = reducer.fit_transform(pose_array)

In [None]:
plt.scatter(
    embedding[:, 0],
    embedding[:, 1])
plt.gca().set_aspect('equal', 'datalim')
plt.title('UMAP projection of the IOC dataset', fontsize=24);

In [None]:
# sequences.index = sequences.seq_id

# sports = []
# for fname in df.index:
#     if "SEQ" in fname:
#         seq = fname.split('/')[-1].split('.')[0]
#         sports.append(sequences.loc[seq].sport)
#     else:
#         sports.append("unknown")

In [None]:
# le = preprocessing.LabelEncoder()
# le.fit(sports)

# encoded_sports = le.transform(sports)

In [None]:
# plt.scatter(
#     embedding[:, 0],
#     embedding[:, 1],
#     c=encoded_sports
#     )
# plt.gca().set_aspect('equal', 'datalim')
# plt.title('UMAP projection of the IOC dataset', fontsize=24);

In [None]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, ImageURL
from bokeh.io import push_notebook
output_notebook()

In [None]:
import base64
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.io import push_notebook
from PIL import Image
import io

# Example 2D points
x = embedding[:, 0]
y = embedding[:, 1]

# Replace with your image URLs
image_paths = [x.replace('json', 'jpeg') for x in df.index]

# Convert images to Base64 encoding
def image_to_base64(image_path):
    with open(image_path, "rb") as f:
        image_binary = f.read()
    return "data:image/png;base64," + base64.b64encode(image_binary).decode()

# Convert each image to Base64 and store them in a list
urls = [image_to_base64(path) for path in image_paths]

# Create a ColumnDataSource
source = ColumnDataSource(data=dict(x=x, y=y, url=urls))

# Output to notebook
output_notebook()

# Create a new plot
p = figure(x_range=(-10,10), y_range=(-10,10), width=800, height=800)

# Add images
p.image_url(url='url', x='x', y='y', source=source, w=0.1, h=0.1, anchor="center")

# Show the plot
handle = show(p, notebook_handle=True)
