# Workout Detector
### The models built here will use keypoints extracted from PoseNet as features and the type of exercise as the label

## Imports

In [471]:
import os
import pandas as pd
import re
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle

## Read in the training set

In [457]:
os.listdir()

train_dataset = pd.read_csv("./training_set.csv")
test_dataset = pd.read_csv("./test_set.csv")

## Convert the keypoints into numpy arrays

In [458]:
def convert_keypoints_to_np(keypoint_string):
    numbers = re.findall("\[\s*-*\d+.\d+\s+-*\d+.\d+\s*\]", keypoint_string)
    
    new_num_arr = []
    for num_arr in numbers:
        convert_to_array = list(map(lambda x: float(x), re.findall("\d+.\d+", num_arr)))
        new_num_arr.append(convert_to_array)
    new_num_arr
    
    return np.array(new_num_arr)

In [459]:
train_dataset["keypoints"] = train_dataset["keypoints"].transform(lambda x: convert_keypoints_to_np(x))
test_dataset["keypoints"] = test_dataset["keypoints"].transform(lambda x: convert_keypoints_to_np(x))

## Encode labels as 0 or 1

In [460]:
le = preprocessing.LabelEncoder()
le.fit(train_dataset["label"])

train_dataset["label"] = le.transform(train_dataset["label"])
test_dataset["label"] = le.transform(test_dataset["label"])

## Drop invalid keypoint rows

In [465]:
def drop_invalid_keypoint_rows(dataset, dataset_type):
    missing_count = 0
    missing_indices = []
    for i in range(0, len(dataset["keypoints"])):
        if str(dataset["keypoints"][i].shape) != "(17, 2)":
            missing_count += 1
            missing_indices.append(i)

    print("The {} data had {} rows with invalid keypoints".format(dataset_type, missing_count))
    dataset = dataset.drop(dataset.index[missing_indices]).reset_index(drop=True)
    
    return dataset

In [466]:
# Drop any rows with keypoints that we couldn't retrieve for training data
train_dataset = drop_invalid_keypoint_rows(train_dataset, "Train")

# Drop any rows with keypoints that we couldn't retrieve for test data
test_dataset = drop_invalid_keypoint_rows(test_dataset, "Test")

The Train data had 0 rows with invalid keypoints
The Test data had 0 rows with invalid keypoints


## Stack keypoints to create 3D Feature vectors and create Train and Test Splits

In [469]:
def stack_keypoints(dataset, dataset_type):
    features = dataset["keypoints"].to_numpy()
    features = np.stack(features, axis=0)
    print("Dimensions of the {} features: {}".format(dataset_type, features.shape))

    labels = dataset["label"].to_numpy()
    print("Dimensions of the {} labels: {}".format(dataset_type, labels.shape))
    
    return features, labels

In [476]:
X_train, Y_train = stack_keypoints(train_dataset, "Train")
X_test, Y_test = stack_keypoints(test_dataset, "Test")

Dimensions of the Train features: (758, 17, 2)
Dimensions of the Train labels: (758,)
Dimensions of the Test features: (492, 17, 2)
Dimensions of the Test labels: (492,)


## Train an SVM on dataset

In [510]:
def train_and_evaluate_svc(x_train, x_test, y_train, y_test):
    # Shuffle the data
    x_train, y_train = shuffle(x_train, y_train, random_state=0)
    x_test, y_test = shuffle(x_test, y_test, random_state=0)
    
    # Scale the data using Standard Scalar
    scalers = {}
    for i in range(x_train.shape[2]):
        scalers[i] = StandardScaler()
        x_train[:, :, i] = scalers[i].fit_transform(x_train[:, :, i]) 
        x_test[:, :, i] = scalers[i].transform(x_test[:, :, i]) 
    
    # Reshape the data
    x_train = x_train.reshape((x_train.shape[0], x_train.shape[1] * x_train.shape[2]))
    x_test = x_test.reshape((x_test.shape[0], x_test.shape[1] * x_test.shape[2]))
    print("Dimensions of the train set after modifying: {} and {}".format(x_train.shape, y_train.shape))
    print("Dimensions of the test set after modifying: {} and {}".format(x_test.shape, y_test.shape))
    
    # Train and evaluate the model
    svc = LinearSVC(random_state=0, max_iter=100000).fit(x_train, y_train)
    print("The train accuracy is {}".format(svc.score(x_train, y_train)))
    print("The test accuracy is: {}".format(svc.score(x_test, y_test)))

In [511]:
train_and_evaluate_svc(X_train, X_test, Y_train, Y_test)

Dimensions of the train set after modifying: (758, 34) and (758,)
Dimensions of the test set after modifying: (492, 34) and (492,)
The train accuracy is 1.0
The test accuracy is: 0.4166666666666667
