In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import ast
import numpy as np

In [None]:
from emv.features.pose import filter_poses, get_angle_feature_vector

In [None]:
df = pd.read_csv('poses.csv')

In [None]:
df['keypoints'] = df['keypoints'].apply(ast.literal_eval)
# df['angle_vec'] = df['angle_vec'].apply(ast.literal_eval)
# df['angle_score'] = df['angle_score'].apply(ast.literal_eval)
# df['bbox'] = df['bbox'].apply(ast.literal_eval)

In [None]:
# count how many keypoints are exactly 0 per row
df['num_zero_keypoints'] = df['keypoints'].apply(lambda x: sum([1 for i in x if i[0] == 0 and i[1] == 0]))

In [None]:
df_no_zero = df[df['num_zero_keypoints'] == 0]

In [None]:
df_no_zero['angle_vec'] = df_no_zero['keypoints'].apply(lambda x: get_angle_feature_vector(x))

In [None]:
# fill in keypoints that are 0 with the average of the index

# 1. Calculate the average of each keypoint
keypoint_vals = df.keypoints.values
keypoint_vals = np.array([i for i in keypoint_vals])

In [None]:
# select first column x
# array layout: [[[x1, y1, c1], [x2, y2, c2], ...], ...]

x = keypoint_vals[:, :, 0]

In [None]:
# this will potentially run for a long time
df_filtered = filter_poses(df_no_zero)

In [None]:
df.shape, df_filtered.shape

In [None]:
df_filtered.to_csv('poses_filtered.csv', index=False)

In [None]:
df_filtered = pd.read_csv('poses_filtered.csv')

In [None]:
def convert_string_to_array(array_str):
    array_str = array_str.strip('[]')
    array_elements = array_str.replace('\n', ' ').split()
    return [float(element) for element in array_elements]

In [None]:
df_filtered['angle_vec'] = df_filtered['angle_vec'].apply(convert_string_to_array)

In [None]:
# divide all the values by 180
df_filtered['angle_vec'] = df_filtered['angle_vec'].apply(lambda x: [i / 180 for i in x])

In [None]:
df_filtered.shape

In [None]:
df = filter_poses(df_filtered)
df.shape

In [None]:
df.to_csv('poses_filtered2.csv', index=False)

In [None]:
df = pd.read_csv('poses_filtered2.csv')

In [None]:
# df['angle_vec'] = df['angle_vec'].apply(convert_string_to_array)
df['angle_vec'] = df['angle_vec'].apply(ast.literal_eval)


In [None]:
df['keypoints'] = df['keypoints'].apply(ast.literal_eval)


In [None]:
from emv.db.dao import DataAccessObject
from sqlalchemy.sql import text

In [None]:
sports = []

for i, row in df.iterrows():
    query = text("""SELECT * FROM media WHERE media_id = :media_id""")
    res = DataAccessObject().fetch_all(query, {'media_id': row.media_id})
    sports.append(res[0]['metadata']['sport'])

In [None]:
df['sport'] = sports

In [None]:
df = df[df['sport'] != 'Non-Sport']

In [None]:
data = np.array(df.angle_vec.values.tolist())

In [None]:
import umap

reducer = umap.UMAP(
    n_neighbors=10,      # Focus more on local structure
    min_dist=0.05,       # Tighter packing of points
    metric='cosine',     # Using cosine distance
    spread=1.5           # More spread-out projection
)
embedding = reducer.fit_transform(data)

In [None]:
import altair as alt

In [None]:
alt.data_transformers.disable_max_rows()

In [None]:
# remove around 5k Tennis poses, keep a sample of 6k
df_tennis = df[df['sport'] == 'Tennis']
df_no_tennis = df[df['sport'] != 'Tennis']
df_combined = pd.concat([df_tennis.sample(6000), df_no_tennis])

In [None]:
df_combined['x'] = embedding[:, 0]
df_combined['y'] = embedding[:, 1]

In [None]:
alt.Chart(df).mark_point().encode(
    x='x',
    y='y',
    color=alt.Color('sport', scale=alt.Scale(scheme='category20')),
    tooltip=['sport']
).properties(
    width=800,
    height=800
).interactive()

In [None]:
# Run umap for a 3D plot
reducer = umap.UMAP(n_components=3)
data = np.array(df.angle_vec.values.tolist())
embedding = reducer.fit_transform(data)

In [None]:
len(df_combined), len(df)

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

fig = go.Figure(data=[go.Scatter3d(
    x=embedding[:, 0],
    y=embedding[:, 1],
    z=embedding[:, 2],
    mode='markers',
    marker=dict(
        size=5,
        color=df_combined['sport'].astype('category').cat.codes,
        opacity=0.8
    )
)])

# Hovertext
fig.update_traces(
    hoverinfo='text',
    # text=[f"Name: {ent[0]}<br>Category: {cat}" for ent, cat in zip(df_per['data'].apply(lambda x: [ent for ent in x['entities'] if ent[0] in persons]), df_per.category)]
)

# tight layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

fig.show()


In [None]:
# build the same, but on hover show a corresponding stick-man pose
# separation in this case doesn't seem to be very good, need to check manually


In [None]:
from emv.api.models import Projection, MapProjectionFeatureCreate
from emv.db.queries import create_projection, create_map_projection_feature, get_media_by_id, get_all_projections
from emv.db.dao import DataAccessObject
from sqlalchemy.sql import text

In [None]:
projections = get_all_projections()

In [None]:
projections

In [None]:
# go through the list to retrieve the dictionary where the model_name is openpifpaf_fast
projection = [i for i in projections if i['model_name'] == 'openpifpaf_fast'][0]

In [None]:
projection['projection_id']

In [None]:
# TODO:  Create new features for the map projection: each pose image needs to have it's own feature and vector instead of just per clip


In [None]:
from emv.db.queries import get_feature_by_media_id_and_type, create_feature, update_feature
from emv.api.models import Feature

In [None]:
df

In [None]:
added_features = {}

In [None]:
for i, row in df.iterrows():
    print(row.media_id, row.frame_number, row.sport)
    new_feature = Feature(
        feature_type="pose_image",
        version="1",
        model_name='PifPafModel.fast',
        model_params={
            'PifPafModel': 'fast',        
        },
        data={"frame": row.frame_number, 
            "keypoints": row.keypoints,
            "sport": row.sport,
        },
        media_id=row.media_id,
    )
    added_features[row.media_id] = new_feature

    create_feature(new_feature)


In [None]:
query = text("SELECT media_id, feature_id FROM feature WHERE feature_type = 'pose_image'")
feature_ids = DataAccessObject().fetch_all(query)

In [None]:
len(added_features)

In [None]:
# [{'media_id': 'ioc-SEQ42757036', 'feature_id': 1320223}, ...
# create a dictionary mapping media_id to feature_id
media_to_feature = {i['media_id']: i['feature_id'] for i in feature_ids}

In [None]:
for i, row in df.iterrows():
    print(i, row.media_id, row.frame_number, row.sport)
    # break

    

    create_map_projection_feature(MapProjectionFeatureCreate(
        projection_id=projection['projection_id'],
        media_id=row.media_id,
        atlas_order=-1,
        index_in_atlas=-1,
        coordinates=[embedding[i, 0], embedding[i, 1], embedding[i, 2]],
        feature_id=media_to_feature[row.media_id]
    ))

In [None]:
len(df), embedding.shape

In [None]:
for i in range(len(embedding)):
    create_map_projection_feature(MapProjectionFeatureCreate(
        projection_id=projection['projection_id'],
        media_id=row.media_id,
        atlas_order=i // max_tiles_per_atlas,
        index_in_atlas=i % max_tiles_per_atlas,
        coordinates=[embedding[i, 0], embedding[i, 1], embedding[i, 2]],
        feature_id=feature_ids[i]
    ))