# Evaluating Taekwondo Moves
System to recognize and evaluate video of performed taekwondo moves

**Instructions**: Run all cells. When prompted to chose a file, import `VideoPose3D-2021-02-23.zip` and `IdealSet_NPYs&MP4s.zip`.


###Get Existing Custom Datasets as of February 23, 2021 & Ideal Set Videos

In [None]:
from google.colab import files
videopose3d = files.upload()

In [None]:
unzip_folder(f'VideoPose3D-2021-02-23.zip')

In [None]:
ideals = files.upload()

In [None]:
unzip_folder('IdealSet_NPYs&MP4s.zip')

##Setup

In [None]:
import sys
import os
from os.path import exists, join, basename, splitext
import time
import math
import pickle
import numpy as np
import cv2
import json
import random
from google.colab.patches import cv2_imshow
from datetime import datetime

In [None]:
!pip install -U scikit-learn

In [None]:
def show_local_mp4_video(file_name, width=640, height=480):
    import io
    import base64
    from IPython.display import HTML
    video_encoded = base64.b64encode(io.open(file_name, 'rb').read())
    return HTML(data='''<video width="{0}" height="{1}" alt="test" controls>
                        <source src="data:video/mp4;base64,{2}" type="video/mp4" />
                      </video>'''.format(width, height, video_encoded.decode('ascii')))

In [None]:
def zip_folder(zip_folder_name, direc):
    """
    Zips up the given folder
    :param zip_folder_name: the name for the zip folder, ex. `smedaram.zip`
    :param direc: the directory for which all the files inside are zipped
    """
    !zip -q -r "$zip_folder_name" "$direc"

In [None]:
def unzip_folder(zip_folder_name, direc='/content'):
    """
    Unzips the given folder
    :param zip_folder_name: Unzips the given .zip file
    :param direc: the directory for where the .zip file is stored, assumed to be within /content, which is the base directory in Google Colab
    """
    !unzip "$zip_folder_name" -d "$direc"

In [None]:
def remove_folder(folder_name):
    !rm -r "$folder_name"

In [None]:
def get_time():
    return f'{datetime.now()}'.replace(' ', '_').replace(':', '-')

In [None]:
#abbreviations for each of the 10 moves
abbreviations = {
    "High block": "hb",
    "Middle block (in-to-out)": "mbio",
    "Middle block (out-to-in)": "mboi",
    "Knife hand block": "khb",
    "Low block": "lb",
    "Punch (middle-level)": "p",
    "High punch (face-level)": "hp",
    "Front kick": "fk",
    "Round kick": "rk",
    "Side kick": "sk"
}

## Skeleton Representation

###Create Custom Datasets

In [None]:
# git_repo_url = 'https://github.com/facebookresearch/VideoPose3D.git'
# project_name = splitext(basename(git_repo_url))[0]
# if not exists(project_name):
#   # clone and install dependencies
#    !git clone -q --depth 1 $git_repo_url

In [None]:
!python3 -m pip install --upgrade pip

In [None]:
# install dependencies: 
!pip install pyyaml==5.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

In [None]:
# install detectron2: (Colab has CUDA 10.1 + torch 1.8)
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
import torch
assert torch.__version__.startswith("1.8")   # need to manually install torch 1.8 if Colab changes its default version
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
# exit(0)  # After installation, you need to "restart runtime" in Colab. This line can also restart runtime

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [None]:
def create_custom_dataset(leadup, ip_folder, slow):
    """
    Creates custom dataset for all the files inside ip_folder
    :param leadup: the path to ip_folder
    :param ip_folder: the name of the folder which contains all video files for which the custom dataset should be made
    :param slow: boolean for whether or not the video should be slowed down before its processed frame by frame; currently obsolete, since discovered the slowing down the video does little to change a blurry frame in and of itself
    :return: the name of the output folder (where the .npy and .mp4 files will be stored) and the name of the custom dataset
    """
    p = os.path.join(leadup, ip_folder)
    if slow == True:
        oup_folder = f'{ip_folder}_slow'
        new_p = os.path.join(leadup, oup_folder)
        if not os.path.exists(new_p):
          os.makedirs(new_p)
        
        for f in os.listdir(p):
            # print(f)
            vidfn, ext = os.path.splitext(f)
            slow_down_video(os.path.join(p, f), os.path.join(new_p, f), speed=2.0)
    else:
        new_p = p
        oup_folder = ip_folder

    twod_outputs_folder = f'{oup_folder}_2d_outputs'
    if not os.path.exists(twod_outputs_folder):
        os.makedirs(twod_outputs_folder)
    !cd VideoPose3D/inference && python3 infer_video_d2.py --cfg COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml --output-dir ../../"$twod_outputs_folder"  --image-ext mp4 ../../"$new_p"/
    !cd VideoPose3D/data/ && python3 prepare_data_2d_custom.py -i ../../"$twod_outputs_folder" -o "$oup_folder"_dataset
    return oup_folder, f'{oup_folder}_dataset'

## Pose Classification

In [None]:
def reconstruct_video_helper(path, threed_outputs_folder, custom_dataset, video_fn):
    """
    Runs the command to reconstruct the given video into its 3D representation; creates a .npy (frame by frame position of joints) file and .mp4 video
    :param path: the path to the video
    :param threed_outputs_folder: the (path to the) folder where the output is stored
    :param custom_dataset: the name of the custom dataset which stores the 2D results from Detectron
    :param video_fn: the name of the video to be reconstructed
    """
    
    p = os.path.join(path, video_fn)
    print(p)
    # print(f'cd VideoPose3D/ && python run.py -d custom -k {custom_dataset} -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject {video_fn}.mp4 --viz-action custom --viz-camera 0 --viz-video ../{p}.mp4 --viz-output ../{threed_outputs_folder}/{video_fn}_si.mp4 --viz-export ../{threed_outputs_folder}/{video_fn}_si --viz-size 6')
    !cd VideoPose3D/ && python run.py -d custom -k "$custom_dataset" -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject "$video_fn".mp4 --viz-action custom --viz-camera 0 --viz-video ../"$p".mp4 --viz-output ../"$threed_outputs_folder"/"$video_fn"_si.mp4 --viz-export ../"$threed_outputs_folder"/"$video_fn"_si --viz-size 6

def reconstruct_videos(leadup, prefix, dts, folder=None):
    """
    Runs `reconstruct_video_helper` for all the videos in the given folder
    :param leadup: the path to the folder
    :param prefix: if the videos are the training set, prefix is the name of the folder
    :param dts: the name of the custom dataset
    :param folder: if the videos are not part of the training set, this is the name of the folder
    :return: the name of the output folder
    """

    # path = f'../{prefix}/'
    # print(f'Dataset name: {dts}')
    if folder is None:
        p = os.path.join(leadup, prefix)
        threed_outputs_folder = os.path.join(leadup, f'{prefix}_3d_outputs')
    else:
        p = os.path.join(leadup, folder)
        threed_outputs_folder = os.path.join(leadup, f'{folder}_3d_outputs')

    if not os.path.exists(threed_outputs_folder):
        os.makedirs(threed_outputs_folder)

    for f in os.listdir(p):
        print(f)
        vidfn, ext = os.path.splitext(f)
        reconstruct_video_helper(p, threed_outputs_folder, dts, vidfn)
    return threed_outputs_folder

###Generate Vocabulary Poses with K-means clustering

In [None]:
hand_techniques = ["High block", "Middle block (in-to-out)", "Middle block (out-to-in)", "Knife hand block", "Low block", "Punch (middle-level)", "High punch (face-level)"]
kicking_techniques = ["Front kick", "Round kick", "Side kick"]

hand_vp = 10
kick_vp = 12

ks_name = f'ks-h{hand_vp}k{kick_vp}.p'

In [None]:
from sklearn.cluster import KMeans

h36m_num_joints = 17
h36m_joints = {
    0: [7,8,9],
    1: [14,8,11],
    2: [8,14,15],
    3: [14,15,16],
    4: [8,11,12],
    5: [11,12,13],
    6: [7,1,2],
    7: [1,2,3],
    8: [7,4,5],
    9: [4,5,6],
    10: [16,7,13],
    11: [3,7,6],
    12: [8,7,16],
    13: [8,7,13]
}
def calc_angle(angle_jgroup, coordinates):
    """
    Calculates all the angles as specified by angle_jgroup and coordinates
    :param angle_jgroup: a dictionary which references which 3 joints make which angle
    :param coordinates: a numpy array which represents all the x,y,z coordinates of the joints
    :return: a numpy array of angles between joints in radians
    """
    angles = np.array([])
    for i in angle_jgroup:
        s,c,e = angle_jgroup[i]
        s_hat = coordinates[s] - coordinates[c]
        c_hat = coordinates[c] - coordinates[c]
        e_hat = coordinates[e] - coordinates[c]

        dot_product = s_hat@e_hat
        alpha = math.acos((dot_product)/(np.linalg.norm(s_hat)*np.linalg.norm(e_hat)))

        angles = np.append(angles, alpha)
    return angles

def get_angles(data):
    """
    Calculates angles between joints for each frame in a given video, represented as `data`
    :param data: the .npy file containing a numpy array of coordinates of joints for each frame in video
    :return: all the angles, a series of numpy arrays (each array is one frame)
    """
    ang = np.empty((len(data), len(h36m_joints)))
    for i, frame in enumerate(data):
        f_ang = ang = calc_angle(h36m_joints, frame)
        ang[i] = f_ang
    return ang

In [None]:
def get_cluster_centroids(k, ips):
    """
    Returns `k` cluster centroids for videos represented by `ips`, a bunch of NumPy archives
    :param k: number of clusters
    :param ips: all the input files, a list of .npy files
    :return: scikit-learn's kmeans object, which contains .cluster_centers_
    """
    X = get_angles(np.load(ips[0]))
    i = 1
    while i < len(ips):
        data = get_angles(np.load(ips[i]))
        X = np.concatenate((X, data))
        i += 1

    kmeans = KMeans(n_clusters=k, random_state=0).fit(X)

    # print(kmeans.labels_)
    # cc = kmeans.cluster_centers_
    # print(cc)
    return kmeans

In [None]:
def get_ips(leadup, prefix):
    """
    Gets all the input filenames, as in all the 3D reconstruction .npy files within the folder
    :param leadup: the path to the folder
    :param prefix: the name of the folder (most often the prefix, if its the training set)
    :return: the list of all input filenames (including paths)
    """
    threed_outputs_folder = os.path.join(leadup, f'{prefix}_3d_outputs')
    ips = []
    for f in os.listdir(threed_outputs_folder):
        vidfn, ext = os.path.splitext(f)
        # print(ext)
        if ext == '.npy':
            ips.append(os.path.join(threed_outputs_folder, f))
    print(ips)
    return ips

###KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
def classify_videos(ips, tech, ks_name, new_ids=False):
    """
    Classifies all videos in `ips` according to whether its a hand or foot technique and to the kmeans objects containing vocabulary poses into one of the 20 possibilities (10 moves, each side)
    :param ips: list of strings for the names of all .npy files that should be classified into a technique
    :param tech: list of strings, either 'h' for hand technique or 'k' for kicking technique, that match up with the list of filenames in `ips` #obsolete in sample runs since we just assume its the technique which requires the largest number of vocabulary poses (so kicking techniques)
    :param ks_name: the name of the pickle file where the kmeans objects from training are stored (containing the vocabulary poses for each of the 20 possibilities), ex. ks-h10k12.p
    :param new_ids: boolean for whether or not new identifiers should be generated, assumed to already be stored in `moveids.p`
    :return: an array of tuples, the first value is the string abbreviation for which move the given video has been classified into and a string for how many out of the total vocabulary poses led to that conclusion
    """
    path_to_ks = os.path.join(lu, ks_name)
    suffix = path_to_ks[path_to_ks.find('-')+1:path_to_ks.find('.p')]

    if not os.path.exists(os.path.join(lu, f'{suffix}.npz')):
        X, y, identifier2move, move2identifier = get_data(path_to_ks, suffix, new_identifiers=new_ids)
    else:
        npzfile = np.load(os.path.join(lu, f'{suffix}.npz'))
        X,y = npzfile['arr_0'], npzfile['arr_1']
        identifier2move, move2identifier = pickle.load(open(os.path.join(lu, f'moveids.p'), 'rb'))

    # neigh = KNeighborsClassifier(n_neighbors=5)
    neigh = KNeighborsClassifier(n_neighbors=10)
    # neigh = KNeighborsClassifier(n_neighbors=12)
    neigh.fit(X, y)

    final_res = []
    for i, ip in enumerate(ips):
        if tech == None or tech[i] == 'k':
          num_vp = int(suffix[suffix.find('k')+1:])
        elif tech[i] == 'h':
            num_vp = int(suffix[suffix.find('h')+1:suffix.find('k')])

        sample_kmeans = get_cluster_centroids(num_vp, [ip])

        res = []
        for pose in sample_kmeans.cluster_centers_:
            res.append(neigh.predict([pose]))

        tally = {}
        for r in res:
            m = identifier2move[r[0]]
            if m not in tally:
                tally[m] = 1
            else:
                tally[m] += 1

        # for w in sorted(tally, key=tally.get, reverse=True):
        #     print(w, tally[w])
            # final_res.append(tally.values())
        maxvote_move = max(tally, key=tally.get)
        final_res.append((maxvote_move, f'{tally[maxvote_move]}/{sum(tally.values())}'))
    return final_res

## Temporal Alignment (DTW)

In [None]:
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw
def dtw_compare_samples(ips, oup):
    """
    Compares all the videos represented by .npy files in `ips` to the output `oup` using fastdtw
    :param ips: the list of strings of the names of input .npy files
    :param oup: the path to the ideal .npy file for this given move
    :return: the distance (similarity) between the ip and oup
    """
    d = []
    y = get_angles(np.load(oup))
    for i in ips:
        X = get_angles(np.load(i))
        distance, path = fastdtw(X, y, dist=euclidean)
        d.append(distance)
    return d


In [None]:
git_repo_url = 'https://github.com/SriMed/sr_eval_tkd_v2.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # clone and install dependencies
   !git clone -q --depth 1 $git_repo_url

In [None]:
lu = 'sr_eval_tkd_v2'

In [None]:
!chmod u+x "$lu"/convert2mp4.sh

In [None]:
def convert2mp4(base):
  !cd "$base" && ./../"$lu"/convert2mp4.sh

In [None]:
from IPython.display import clear_output
def evaluate_video():
    """
    Given the name of a video, 
    1) ensures its in the necesssary directories and that it's a .mp4 file
    2) Creates a custom dataset
    3) Reconstructs the video
    4) Classifies the video
    5) Temporally aligns with DTW
    6) Prints the results
    """
    file = input('Enter the name of your video (ex. i/am/a/boss/test.mp4): ')  # assume in directory
    lu_pipe, tail = os.path.split(file)
    vidfn, ext = os.path.splitext(tail)

    if ext != ".mp4":
        convert2mp4(lu_pipe)

    ip_f_path = os.path.join(lu_pipe, vidfn)
    if not os.path.exists(ip_f_path):
        os.makedirs(ip_f_path)
    !mv
    "$file" "$ip_f_path"

    oup_f, dts = create_custom_dataset(lu_pipe, vidfn, slow=False)
    threed_outputs_folder_path = reconstruct_videos(lu_pipe, None, dts, oup_f)
    show_local_mp4_video(f'{threed_outputs_folder_path}/{vidfn}_si.mp4')
    ips = get_ips(lu_pipe, oup_f)
    res = classify_videos(ips, None, ks_name)
    move = res[0][0]
    confidence = res[0][1]
    confidence = int(confidence[:confidence.find('/')]) / int(confidence[confidence.find('/') + 1:])

    clear_output()

    print(f"Your video was classified as a {move} with {confidence * 100}% confidence")
    oup = os.path.join('ideal_3d_outputs', f'{move}_ideal_si.npy')
    d = dtw_compare_samples(ips, oup)
    print(f"Relative to the exemplar, your score is {d}. The closer to 0, the better your execution. Try again to get closer to 0.")

In [None]:
evaluate_video()