## Dataset setup

In [41]:
dataset_location = "../dataset"
categories = {
    0: "back bowled",
    1: "back too low",
    2: "correct",
    3: "knees inward",
    4: "knees too far forward",
    5: "legs not far",
    6: "legs too far",
    7: "not deep enough",
    8: "too deep enough"
}

In [42]:
def preprocess_file(df):
    # Normalize data
    pose_mean = df.stack().mean()
    pose_std = df.stack().std()
    df = (df - pose_mean) / pose_std
    return df


In [43]:
import pickle
import os
import numpy as np
import pandas as pd

import preprocess


def deserialize(file_path):
    print(f"working on {file_path}...")
    with open(file_path, 'rb') as f:
        pose = pickle.load(f)

    return pose


def find_files(folder_dir):
    files = []
    for file in os.listdir(folder_dir):
        if file.endswith(".pose"):
            filepath = os.path.join(folder_dir, file)
            print(f"FOUND FILE: {filepath}")
            if os.path.getsize(filepath) > 0:      
                files.append(os.path.join(folder_dir, file))
            else:
                print(F"ERR: {filepath} EMPTY, skipping...")

    return files


def convert_to_df(pose):
    new_pose = []
    for point in pose:
        new_pose.append([point['x'], point['y'], point['z']])

    return pd.DataFrame(new_pose)


# oops
def find_deserialize_preprocess(folder_dir):
    files = find_files(folder_dir)
    _poses = []
    for file in files:
        _poses.append(preprocess_file(convert_to_df(deserialize(file))))

    print("DONE :O")
    return _poses


## Data collection
This generates X and Y, where each decision Y[i] corresponds to pose X[i]

In [47]:
import os

data_x = []
data_y = []

for category_id in categories.keys():
    # get path
    category_path = os.path.join(dataset_location, categories[category_id])
    poses = find_deserialize_preprocess(category_path)
    # print(len(poses))
    data_x += poses
    data_y += [category_id] * len(poses)
    # print(len(data_x))

FOUND FILE: ../dataset\back bowled\back_bowled_0.pose
FOUND FILE: ../dataset\back bowled\back_bowled_1.pose
FOUND FILE: ../dataset\back bowled\back_bowled_10.pose
FOUND FILE: ../dataset\back bowled\back_bowled_11.pose
FOUND FILE: ../dataset\back bowled\back_bowled_12.pose
FOUND FILE: ../dataset\back bowled\back_bowled_13.pose
FOUND FILE: ../dataset\back bowled\back_bowled_14.pose
FOUND FILE: ../dataset\back bowled\back_bowled_15.pose
FOUND FILE: ../dataset\back bowled\back_bowled_16.pose
FOUND FILE: ../dataset\back bowled\back_bowled_17.pose
FOUND FILE: ../dataset\back bowled\back_bowled_18.pose
FOUND FILE: ../dataset\back bowled\back_bowled_19.pose
FOUND FILE: ../dataset\back bowled\back_bowled_2.pose
FOUND FILE: ../dataset\back bowled\back_bowled_20.pose
FOUND FILE: ../dataset\back bowled\back_bowled_21.pose
FOUND FILE: ../dataset\back bowled\back_bowled_22.pose
FOUND FILE: ../dataset\back bowled\back_bowled_23.pose
FOUND FILE: ../dataset\back bowled\back_bowled_24.pose
FOUND FILE: .

# Training
000OooOOoo0oOoo0ooo  
Using SVM right now

In [48]:
from sklearn.model_selection import train_test_split
from sklearn import svm
# Shuffle and split data
data_x = np.array(data_x)
data_y = np.array(data_y)

# reshape into 2d
nsamples, nx, ny = data_x.shape
data_x_reshaped = data_x.reshape((nsamples,nx*ny))

print(len(data_x))
# print(data_x)
print(len(data_y))
x_train, x_test, y_train, y_test = train_test_split(data_x_reshaped, data_y, test_size=0.33, shuffle=True)

clf = svm.LinearSVC(dual='auto', max_iter=80000)
clf.fit(x_train, y_train)

688
688


## Evaluation

In [51]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, accuracy_score

y_pred = clf.predict(x_test)
print("R^2     :", r2_score(y_test, y_pred))
print("MAE     :", mean_absolute_error(y_test,y_pred))
print("RMSE    :",np.sqrt(mean_squared_error(y_test, y_pred)))
print("Accuracy:", accuracy_score(y_test, y_pred))

R^2     : 0.49680948040109385
MAE     : 0.7368421052631579
RMSE    : 1.796683103788366
Accuracy: 0.7675438596491229
