## Dataset setup

In [106]:
dataset_location = "../dataset"
categories = {
    0: "back bowled",
    1: "back too low",
    2: "correct",
    3: "knees inward",
    4: "knees too far forward",
    5: "legs not far",
    6: "legs too far",
    7: "not deep enough",
    8: "too deep enough"
}

In [107]:
def preprocess_file(df):
    # Normalize data
    pose_mean = df.stack().mean()
    pose_std = df.stack().std()
    df = (df - pose_mean) / pose_std
    return df


In [108]:
import pickle
import os
import numpy as np
import pandas as pd

import preprocess


def deserialize(file_path):
    print(f"working on {file_path}...")
    with open(file_path, 'rb') as f:
        pose = pickle.load(f)

    return pose


def find_files(folder_dir):
    files = []
    for file in os.listdir(folder_dir):
        if file.endswith(".pose"):
            filepath = os.path.join(folder_dir, file)
            print(f"FOUND FILE: {filepath}")
            if os.path.getsize(filepath) > 0:      
                files.append(os.path.join(folder_dir, file))
            else:
                print(F"ERR: {filepath} EMPTY, skipping...")

    return files


def convert_to_df(pose):
    new_pose = []
    for point in pose:
        new_pose.append([point['x'], point['y'], point['z']])

    return pd.DataFrame(new_pose)


# oops
def find_deserialize_preprocess(folder_dir):
    files = find_files(folder_dir)
    _poses = []
    for file in files:
        _poses.append(preprocess_file(convert_to_df(deserialize(file))))

    print("DONE :O")
    return _poses


## Data collection
This generates X and Y, where each decision Y[i] corresponds to pose X[i]

In [109]:
import os

data_x = []
data_y = []

for category_id in categories.keys():
    # get path
    category_path = os.path.join(dataset_location, categories[category_id])
    poses = find_deserialize_preprocess(category_path)
    # print(len(poses))
    data_x += poses
    data_y += [category_id] * len(poses)
    # print(len(data_x))

FOUND FILE: ../dataset\back bowled\back_bowled_0.pose
FOUND FILE: ../dataset\back bowled\back_bowled_1.pose
FOUND FILE: ../dataset\back bowled\back_bowled_10.pose
FOUND FILE: ../dataset\back bowled\back_bowled_11.pose
FOUND FILE: ../dataset\back bowled\back_bowled_12.pose
FOUND FILE: ../dataset\back bowled\back_bowled_13.pose
FOUND FILE: ../dataset\back bowled\back_bowled_14.pose
FOUND FILE: ../dataset\back bowled\back_bowled_15.pose
FOUND FILE: ../dataset\back bowled\back_bowled_16.pose
FOUND FILE: ../dataset\back bowled\back_bowled_17.pose
FOUND FILE: ../dataset\back bowled\back_bowled_18.pose
FOUND FILE: ../dataset\back bowled\back_bowled_19.pose
FOUND FILE: ../dataset\back bowled\back_bowled_2.pose
FOUND FILE: ../dataset\back bowled\back_bowled_20.pose
FOUND FILE: ../dataset\back bowled\back_bowled_21.pose
FOUND FILE: ../dataset\back bowled\back_bowled_22.pose
FOUND FILE: ../dataset\back bowled\back_bowled_23.pose
FOUND FILE: ../dataset\back bowled\back_bowled_24.pose
FOUND FILE: .

# Training
000OooOOoo0oOoo0ooo  
Using SVM right now

In [110]:
import numpy
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn import svm
# Shuffle and split data
data_x = np.array(data_x)
data_y = np.array(data_y)

# reshape into 2d
nsamples, nx, ny = data_x.shape
data_x_reshaped = data_x.reshape((nsamples,nx*ny))

print(len(data_x))
# print(data_x)
print(len(data_y))
def train():
    x_train, _x_test, y_train, _y_test = train_test_split(data_x_reshaped, data_y, test_size=0.2, shuffle=True)
    
    # scaling
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train.astype(np.float32))
    _x_test = scaler.transform(_x_test.astype(np.float32))
    # c=numpy.random.randint(0,100)
    _svc = svm.LinearSVC(dual='auto', max_iter=8000000)
    
    # # Tune hyperparams
    # param_grid = {
    #     "C": (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
    #     # "loss": ['hinge', 'squared_hinge'],
    # }
    # 
    # clf = RandomizedSearchCV(_svc, param_grid, n_jobs=-1)
    
    _svc.fit(x_train, y_train)
    return _svc, _x_test, _y_test

688
688


## Evaluation

In [111]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, accuracy_score, classification_report
svc, x_test, y_test = train()
y_pred = svc.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.47      0.64      0.54        11
           1       0.79      0.58      0.67        19
           2       0.79      0.58      0.67        19
           3       0.55      0.55      0.55        11
           4       0.62      0.67      0.64        12
           5       0.67      0.56      0.61        18
           6       0.71      1.00      0.83        15
           7       0.73      0.73      0.73        11
           8       0.79      0.86      0.83        22

    accuracy                           0.69       138
   macro avg       0.68      0.68      0.67       138
weighted avg       0.70      0.69      0.68       138



In [114]:
# multi
models = []
for i in range(400):
    svc, x_test, y_test = train()
    y_pred = svc.predict(x_test)
    models.append((svc, accuracy_score(y_test, y_pred)))
    print(accuracy_score(y_test, y_pred))

0.7318840579710145
0.7898550724637681
0.7463768115942029
0.6884057971014492
0.7391304347826086
0.7536231884057971
0.7246376811594203
0.7246376811594203
0.7753623188405797
0.7101449275362319
0.7101449275362319
0.7463768115942029
0.7536231884057971
0.7391304347826086
0.7246376811594203
0.7318840579710145
0.717391304347826
0.7536231884057971
0.6884057971014492
0.7463768115942029
0.7536231884057971
0.7391304347826086
0.7536231884057971
0.6884057971014492
0.7463768115942029
0.7318840579710145
0.7608695652173914
0.7681159420289855
0.7391304347826086
0.7101449275362319
0.7753623188405797
0.8115942028985508
0.7608695652173914
0.7028985507246377
0.7608695652173914
0.7608695652173914
0.7391304347826086
0.7536231884057971
0.7318840579710145
0.7318840579710145
0.6304347826086957
0.7898550724637681
0.7246376811594203
0.7463768115942029
0.7391304347826086
0.7391304347826086
0.717391304347826
0.7391304347826086
0.7536231884057971
0.7608695652173914
0.7608695652173914
0.7028985507246377
0.688405797101

In [115]:
from operator import itemgetter
# find best one
best_model = max(models, key=itemgetter(1))
print(best_model)

(LinearSVC(dual='auto', max_iter=8000000), 0.8405797101449275)


In [117]:
import pickle

# save model
with open(f'pose_lsvc_acc{int(best_model[1]*10000 // 100)}.model','wb') as f:
    pickle.dump(best_model[0],f)