## Load the Motion Data

Data is in x and y coordinates for each pixel. Each sample will be an array of 10 (frames) x 40 x 40 (capture window) x 2 (x and y) dimensions. 

In [87]:
import numpy as np
import os
import matplotlib
import pandas as pd
matplotlib.use('TkAgg') # For displaying animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.neural_network import MLPClassifier as mlpc
from sklearn import linear_model as lm
from sklearn import svm as svm
from helper_functions import *
from normalize_data import *
from integral_try import *
from numpy import array
from sklearn import preprocessing as pp
from sklearn.ensemble import RandomForestClassifier
from sklearn import ensemble as ensemble
from sklearn.model_selection import train_test_split

In [67]:
data_dir = 'sonic_pi_face/data/'

# Get list of data files
data_files = get_data_files(data_dir)

# Load data into a dictionary
# Note: Checks for incomplete data
data = get_gesture_data(data_files)

# Normalize data across data samples and frames (x and y will be normalized independently)
# data = normalize_data(data)
print(np.shape(data['open-close']))

# Load first data sample from `open-close`
sample = data['open-close'][0]
print(np.shape(sample))

# calculate the integral image
iimage = i_image(sample[4])

# calculate the integral for the first quarter from the integral image
try_integral = get_integral(iimage,0,0,20,20)

print('Example integral of the 1st quarter of one frame: ' , try_integral)

Incomplete data sample found at  C:\Users\kle\Documents\sonic-face\sonic_pi_face\data\2017-06-13_113507_waving-beauty-pageant-style.txt
Incomplete data sample found at  C:\Users\kle\Documents\sonic-face\sonic_pi_face\data\2017-06-13_113523_waving-beauty-pageant-style.txt
Incomplete data sample found at  C:\Users\kle\Documents\sonic-face\sonic_pi_face\data\2017-06-13_112555_slide-vertically.txt
Incomplete data sample found at  C:\Users\kle\Documents\sonic-face\sonic_pi_face\data\2017-06-13_112621_slide-vertically.txt
Incomplete data sample found at  C:\Users\kle\Documents\sonic-face\sonic_pi_face\data\2017-06-13_112807_slide-vertically.txt
Incomplete data sample found at  C:\Users\kle\Documents\sonic-face\sonic_pi_face\data\2017-06-13_113118_slide-vertically.txt
Incomplete data sample found at  C:\Users\kle\Documents\sonic-face\sonic_pi_face\data\2017-06-13_113226_slide-vertically.txt
Incomplete data sample found at  C:\Users\kle\Documents\sonic-face\sonic_pi_face\data\2017-06-12_135653

## Visualize horizontal motion across frames

In [68]:
sample = data['open-close'][121] # Fifth sample
frame = sample[5][:,:,0] # Middle frame
# Note: Second argument takes 0 for `x`, 1 for `y` coordinate
# display_frames(sample,0) 

## Create training and test data split

Create training and test data split for classification

In [69]:
def get_data(data, key):
    data_list = data[key]
    data_array = np.asarray(data_list)

    # Flatten array to n x 32000
    flattened_data = data_array.reshape((len(data_array),10*1600*2))
    return flattened_data

def scale(data, target_gesture):
    data *= (np.max(target_gesture) + np.abs(np.min(target_gesture))) - np.min(target_gesture)
    return data

In [70]:
# Load target gesture data
open_close_X = get_data(data, 'open-close')
open_close_Y = np.ones((len(open_close_X)))

In [71]:
# Load empty (no-gesture) data
empty_X = get_data(data, 'empty')
empty_Y = np.zeros(len(empty_X))

In [72]:
# Load other gesture
slide_horizontal_X = get_data(data,'slide-horizontally')
slide_horizontal_Y = np.zeros((len(slide_horizontal_X)))

In [73]:
beauty_pageant_X = get_data(data, 'waving-beauty-pageant-style')
beauty_pageant_Y = np.zeros((len(beauty_pageant_X)))

In [95]:
# X = np.vstack([open_close_X, empty_X, slide_horizontal_X, beauty_pageant_X])
Y = np.hstack([open_close_Y, empty_Y, slide_horizontal_Y, beauty_pageant_Y])

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42)

## Random Forest

In [94]:
clf_forest = RandomForestClassifier(n_estimators=10)
clf_forest = clf_forest.fit(X_train, y_train)
print(clf_forest.score(X_test,y_test))

0.896551724138


## Adaboost

In [75]:
clf_adaboost = ensemble.AdaBoostClassifier()
clf_adaboost = clf_adaboost.fit(X_train, y_train)

print(clf_adaboost.score(X_test,y_test))

0.956896551724


## Bagging

In [76]:
clf_bagging = ensemble.BaggingClassifier()
clf_bagging = clf_bagging.fit(X_train, y_train)
print(clf_bagging.score(X_test,y_test))


0.862068965517


In [77]:
clf_extra_tree = ensemble.ExtraTreesClassifier()
clf_extra_tree = clf_extra_tree.fit(X_train, y_train)
print(clf_extra_tree.score(X_test,y_test))
# print("Predictions:\n{}".format(clf_bagging.predict(X_test)))
# print("Actual:\n{}".format(y_test))

0.939655172414


## Gradient boosting

In [78]:
clf_gradient_boosting = ensemble.GradientBoostingClassifier()
clf_gradient_boosting = clf_gradient_boosting.fit(X_train, y_train)
print(clf_gradient_boosting.score(X_test,y_test))
# print("Predictions:\n{}".format(clf_bagging.predict(X_test)))
# print("Actual:\n{}".format(y_test))

0.905172413793


## Multilayer Perceptron

In [79]:
clf_mlpc = mlpc()
clf_mlpc = clf_mlpc.fit(X_train, y_train)
print(clf_mlpc.score(X_test,y_test))

0.922413793103


## SVM

In [85]:
clf_svm = svm.SVC(decision_function_shape='ovo')
clf_svm = clf_svm.fit(X_train, y_train)
print(clf_svm.score(X_test,y_test))

0.913793103448


## Logistic Regression

In [89]:
clf_logistic = lm.LogisticRegression(multi_class = 'ovr')
clf_logistic = clf_logistic.fit(X_train, y_train)
print(clf_logistic.score(X_test,y_test))

0.887931034483


## Pandas dataframes for organizing the data

In [80]:
# TODO: Convert all code to pandas for flexibility and display
label = pd.Series(['Open']*len(open_close_X))
df_open = pd.DataFrame(open_close_X)
df_open['label'] = label

label = pd.Series(['Empty']*len(empty_X))
df_empty = pd.DataFrame(empty_X)
df_empty['label'] = label

label = pd.Series(['Slide-H']*len(slide_horizontal_X))
df_slide_h = pd.DataFrame(slide_horizontal_X)
df_slide_h['label'] = label

df_X = pd.DataFrame()
df_X = df_X.append([df_open,df_empty,df_slide_h])

def encode_target(df, target_column):
    """Add column to df with integers for the target.

    Args
    ----
    df -- pandas DataFrame.
    target_column -- column to map to int, producing
                     new Target column.

    Returns
    -------
    df_mod -- modified DataFrame.
    targets -- list of target names.
    """
    df_mod = df.copy()
    targets = df_mod[target_column].unique()
    map_to_int = {name: n for n, name in enumerate(targets)}
    df_mod["Target"] = df_mod[target_column].replace(map_to_int)

    return (df_mod, targets)

df2, targets = encode_target(df_X, "label")

## [WIP] Decision Tree Visualization


In [81]:
# FIXME: Test once number of features is less than 360

# Get features
features = df2.columns[:-2]
y = df2["Target"]
dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
dt.fit(X, y)

def visualize_tree(tree, feature_names):
    """Create tree png using graphviz.

    Args
    ----
    tree -- scikit-learn DecsisionTree.
    feature_names -- list of feature names.
    """
    with open("dt.dot", 'w') as f:
        export_graphviz(tree, out_file=f,
                        feature_names=feature_names)

    command = ["dot", "-Tpng", "dt.dot", "-o", "dt.png"]
    try:
        subprocess.check_call(command)
    except:
        exit("Could not run dot, ie graphviz, to "
             "produce visualization")
visualize_tree(dt,features)

ValueError: Number of labels=347 does not match number of samples=462

## SVM

## Logistic Regression

## Decision Jungle

## NN