# Before We Start
1. Python, Pandas, VS Code setup instructions 
2. Dataset link or simulated pose data provided 

# Step 0: Install & Import Modules

In [None]:
# Install stuff

In [6]:
# Import stuff
import pandas as pd
from pandas import DataFrame
import copy, json
from os import listdir

In [30]:
# Helper indexer that will convert from a joint number to the joint name, using COCO Pose Output Format
# This may or may not be helpful later for debugging
coord_to_name = ["head",
                 "hips",
                 "left_shoulder",
                 "left_elbow",
                 "left_hand",
                 "right_shoulder",
                 "right_elbow",
                 "right_hand",
                 "left_hip",
                 "left_knee",
                 "left_foot",
                 "right_hip",
                 "right_knee",
                 "right_foot",
                 "left_eye",
                 "right_eye",
                 "left_ear",
                 "right_ear"]

# Step 1: Load and examine pose data 

In [38]:
# Index downloaded pose data from OpenPoses.com
def index_poses(path="poses"):
    json_list = listdir(path)
    json_list.sort()        # Needed to prevent the JSONs from being in a seemingly random order
    return json_list

json_list = index_poses()

# Extract pose keypoints from all of the JSONs
def read_all_poses(json_list,path="poses"):
    pose_list = []
    for i in range(len(json_list)):
        item = path + "/" + json_list[i]
        with open(item) as file:
            data = json.load(file)
            points_list = list(data[0]["people"][0]["pose_keypoints_2d"])   # Hardcoded to fit the JSON scheme
            unneeded_3d_coord = 1.0   # the JSONs provide z-axis data, which we don't need, and it's all 1.0
            points_list = [i for i in points_list if i != unneeded_3d_coord]
            pose_list.append(points_list)
    return pose_list

pose_list = read_all_poses(json_list)

# Generate labels to use in the DataFrame
def generate_column_labels(coords=18):
    labels = []
    for i in range(coords):
        formatted_number = f"{i:02d}"           # Formats to 2 digit string representation of int, which will only work well to 99 points
        labels.append(formatted_number + "x")
        labels.append(formatted_number + "y")
    return labels

labels = generate_column_labels()

# Create DataFrame with proper coordinate labels
def prep_dataframe(labels):
    return DataFrame(columns=labels)

df = prep_dataframe(labels)

# Store each pose in the DataFrame
def store_pose(pose, labels, df):
    if len(pose) != len(labels):
        print("Error: Size mismatch between coordinates list and labels")
        return df
    temp_points = copy.copy(pose)   # Use a copy so that we don't potentially lose poses if we reuse the variable
    df.loc[len(df)] = temp_points   # Append to df. No need to return; it's an in-place edit

def store_all_poses(pose_list, labels, df):
    for pose in pose_list:
        store_pose(pose, labels, df)

store_all_poses(pose_list, labels, df)
print(df)


           00x         00y         01x         01y         02x         02y  \
0   416.970074  111.688862  395.498931  200.326142  343.197430  201.977769   
1   370.682954  112.752594  397.372456  176.547014  332.927073  176.547014   
2   298.092040  133.001079  306.938936  193.033584  253.857562  192.401663   
3   401.390829  113.188565  410.100915  170.675131  361.905107  172.417148   
4   371.250272  179.687440  349.656885  220.539793  288.378355  225.208634   
5   394.256385  157.521886  407.238060  220.807551  360.720392  210.530391   
6   343.242699  110.979140  382.994881  178.207095  321.028244  183.468413   
7   377.927925  105.643599  383.056230  183.707791  325.505256  189.975719   
8   380.413173  116.665346  384.598824  198.808747  324.953298  198.285540   
9   396.912812  110.173967  395.756666  207.868248  317.138783  206.134030   
10  405.833455  110.839554  388.264989  194.714816  337.826487  185.647220   
11  419.768267   98.331520  400.757826  180.134025  356.400129  

# Step 2: Clean and filter incomplete rows 

In [40]:
# Figure out which rows are incomplete so we know what we're about to get rid of
def print_empties(df):
    df_nan = df[df.isna().any(axis=1)] 
    if df_nan.empty:
        print("No incomplete rows")
    else:
        print(df_nan)

print_empties(df)

# Now, get rid of them
df.dropna(inplace=True)

No incomplete rows


# Step 3: Create derived features: speed, angle, joint distance 

In [51]:
# Grab x,y coordinates for a joint for a pose
def pack_coord(joint_no,pose_no, df):
    joint_str = f"{joint_no:02d}"
    coord = [df.iloc[[pose_no]][joint_str + "x"]]
    coord.append(df.iloc[[pose_no]][joint_str + "y"])
    coord.pop(0)
    return coord

# Calculate the movement vector for the two points
def compute_vect(p1, p2):
    return ([p1[0]-p2[0],p1[1]-p2[1]])

sample = pack_coord(0,0,df)     # This is generating a np.float64 instead of a float -- a minor mismatch
print(sample)

[0    111.688862
Name: 00y, dtype: float64]


# Step 4: Assign dance move labels 

# Step 5: Convert pose DataFrame to movement Vectors