## Dataset setup

In [29]:
import angle_calc

dataset_location = "../dataset"
categories = {
    0: "back bowled",
    1: "back too low",
    2: "correct",
    3: "knees inward",
    4: "knees too far forward",
    5: "legs not far",
    6: "legs too far",
    7: "not deep enough",
    8: "too deep enough"
}

In [30]:
def preprocess_file(df):
    print(df)
    # get angles
    return angle_calc.get_angles_of_interest(df)


In [31]:
import pickle
import os
import numpy as np
import pandas as pd

import preprocess


def deserialize(file_path):
    # print(f"working on {file_path}...")
    with open(file_path, 'rb') as f:
        pose = pickle.load(f)

    return pose


def find_files(folder_dir):
    files = []
    for file in os.listdir(folder_dir):
        if file.endswith(".pose"):
            filepath = os.path.join(folder_dir, file)
            # print(f"FOUND FILE: {filepath}")
            if os.path.getsize(filepath) > 0:      
                files.append(os.path.join(folder_dir, file))
            else:
                print(F"ERR: {filepath} EMPTY, skipping...")

    return files


def convert_to_matrix(pose):
    new_pose = []
    for point in pose:
        new_pose.append(np.array([point['x'], point['y'], point['z']]))

    return new_pose


# oops
def find_deserialize_preprocess(folder_dir):
    files = find_files(folder_dir)
    _poses = []
    for file in files:
        _poses.append(preprocess_file(convert_to_matrix(deserialize(file))))

    print("DONE :O")
    return _poses


## Data collection
This generates X and Y, where each decision Y[i] corresponds to pose X[i]

In [32]:
import os

data_x = []
data_y = []

for category_id in categories.keys():
    # get path
    category_path = os.path.join(dataset_location, categories[category_id])
    poses = find_deserialize_preprocess(category_path)
    # print(len(poses))
    data_x += poses
    data_y += [category_id] * len(poses)
    # print(len(data_x))

ERR: ../dataset\back bowled\back_bowled_34.pose EMPTY, skipping...
[array([-0.38778007, -0.40843779, -0.27954069]), array([-0.38778955, -0.44825616, -0.28226671]), array([-0.38748425, -0.44903058, -0.28180858]), array([-0.38818043, -0.44907823, -0.28181481]), array([-0.4090409 , -0.44074738, -0.26289678]), array([-0.40926573, -0.44133449, -0.26429906]), array([-0.41025481, -0.44257694, -0.26295862]), array([-0.28099757, -0.45785168, -0.23962975]), array([-0.389557  , -0.42912197, -0.16888171]), array([-0.33784652, -0.39509657, -0.26338226]), array([-0.36701718, -0.3859551 , -0.24155733]), array([-0.10935171, -0.31099358, -0.22196719]), array([-0.35917568, -0.24686173, -0.06131463]), array([-0.16461962, -0.09690667, -0.32389101]), array([-0.39292321, -0.02228728, -0.15583961]), array([-0.32666668, -0.23050003, -0.37865973]), array([-0.39562184, -0.17147455, -0.36527896]), array([-0.36721483, -0.25992411, -0.39664707]), array([-0.38352868, -0.21568307, -0.39083546]), array([-0.35738683, 

# Training
000OooOOoo0oOoo0ooo  
Using SVM right now

In [39]:
from sklearn.model_selection import train_test_split
from sklearn import svm
# Shuffle and split data
data_x = np.array(data_x)
data_y = np.array(data_y)

# reshape into 2d
data_x_reshaped = data_x
# nsamples, nx, ny = data_x.shape
# data_x_reshaped = data_x.reshape((nsamples,nx*ny))

print(len(data_x))
# print(data_x)
print(len(data_y))
x_train, x_test, y_train, y_test = train_test_split(data_x_reshaped, data_y, test_size=0.33, shuffle=True)

clf = svm.LinearSVC(dual='auto', max_iter=80000)
clf.fit(x_train, y_train)

688
688


## Evaluation

In [40]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, accuracy_score

y_pred = clf.predict(x_test)
print("R^2     :", r2_score(y_test, y_pred))
print("MAE     :", mean_absolute_error(y_test,y_pred))
print("RMSE    :",np.sqrt(mean_squared_error(y_test, y_pred)))
print("Accuracy:", accuracy_score(y_test, y_pred))

R^2     : -0.4741244881892157
MAE     : 2.1008771929824563
RMSE    : 3.054332556946125
Accuracy: 0.34649122807017546
