## Load the Motion Data

Data is in x and y coordinates for each pixel. Each sample will be an array of 10 (frames) x 40 x 40 (capture window) x 2 (x and y) dimensions. 

In [None]:
import numpy as np
import os
import matplotlib
matplotlib.use('TkAgg') # For displaying animation
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.neural_network import MLPClassifier as mlpc
from sklearn import linear_model as lm
from sklearn import svm as svm
from helper_functions import *
from normalize_data import *
from integral_try import *
from numpy import array
from sklearn import preprocessing as pp
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split

# %matplotlib notebook # Enable when using in notebook

In [None]:
data_dir = 'sonic_pi_face/data/'

# Get list of data files
data_files = get_data_files(data_dir)

# Load data into a dictionary
# Note: Checks for incomplete data
data = get_gesture_data(data_files)

# Normalize data across data samples and frames (x and y will be normalized independently)
# data = normalize_data(data)
print(np.shape(data['open-close']))

# Load first data sample from `open-close`
sample = data['open-close'][0]
print(np.shape(sample))

# calculate the integral image
iimage = i_image(sample[4])

# calculate the integral for the first quarter from the integral image
try_integral = get_integral(iimage,0,0,20,20)

print('Example integral of the 1st quarter of one frame: ', try_integral)

## Visualize optical flow sample frame

In [None]:
gestures = list(data)
print(gestures) # List gestures
sample = data['open-close'][3][5] # 5th Open-close sample
image = sample[4] # 5th frame of sample

## Visualize horizontal motion across frames

In [None]:
sample = data['open-close'][5] # Fifth sample
anim = display_frames(sample)
# anim.save('open-close.gif',dpi=80, writer='imagemagick')

## Extract features using integral image

In [None]:
df_red = feature_extract(data) # Dataframe with 32 (16 * 2 (x and y coordinates)) dimensions

## Feature optimization

In [None]:
def reduce_dimensions(sample, rows = 4, cols = 4):
    '''Reduce dimensions of images in `sample` using integral image.'''
    array = np.zeros((10,rows*cols,2))
    sections = []
    for i in range(rows):
        for j in range(cols):
            x0 = (40//rows) * (i)
            y0 = (40//rows) * (j)
            x1 = (40//rows) * (i+1) -1
            y1 = (40//rows) * (j+1) -1
            point = np.array([x0,y0,x1,y1])
            sections.append(point)
    for ind,frame in enumerate(sample):
        image = i_image(frame)
        for sect_ind, section in enumerate(sections):
            feature = get_integral(image, *section)
            array[ind][sect_ind] = feature
    return array

In [None]:
# Compare random forest on various feature transormations
data_sets = []
dims = [1,2,3,4,5] # Number of cols and rows
for rows in dims:
    for cols in dims:
        df_red = feature_extract(data,rows=rows,cols=cols)
        data_sets.append(df_red)

## Random forest classification

Create training and test data split for classification

In [None]:
def get_data(data, key):
    data_list = data[key]
    data_array = np.asarray(data_list)

    # Flatten array to n x 32000
    flattened_data = data_array.reshape((len(data_array),10*1600*2))
    return flattened_data

def scale(data, target_gesture):
    data *= (np.max(target_gesture) + np.abs(np.min(target_gesture))) - np.min(target_gesture)
    return data

In [None]:
# Load target gesture data
open_close_X = get_data(data, 'open-close')
open_close_Y = np.ones((len(open_close_X)))

In [None]:
# Load empty (no-gesture) data
empty_X = get_data(data, 'empty')
empty_Y = np.zeros(len(empty_X))

In [None]:
# Load other gesture
slide_horizontal_X = get_data(data,'slide-horizontally')
slide_horizontal_Y = np.zeros((len(slide_horizontal_X)))

In [None]:
labels = ['open-close','empty','slide_horizontal_X']
X = np.vstack([open_close_X, empty_X, slide_horizontal_X])
Y = np.hstack([open_close_Y, empty_Y, slide_horizontal_Y])

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42)

# Classify test data using random forest
clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X_train, y_train)
# clf.score(X_test,y_test)
print("Predictions:\n{}".format(clf.predict(X_test)))
print("Actual:\n{}".format(y_test))
print("Score for {}:\n{}".format(labels,clf.score(X_test,y_test)))

## Pandas dataframes for organizing the data

In [None]:
def encode_target(df, target_column):
    """Add column to df with integers for the target.

    Args
    ----
    df -- pandas DataFrame.
    target_column -- column to map to int, producing
                     new Target column.

    Returns
    -------
    df_mod -- modified DataFrame.
    targets -- list of target names.
    """
    df_mod = df.copy()
    targets = df_mod[target_column].unique()
    map_to_int = {name: n for n, name in enumerate(targets)}
    df_mod["Target"] = df_mod[target_column].replace(map_to_int)

    return (df_mod, targets)

df2, targets = encode_target(df_red, "label")

## Select labels for training and testing

In [None]:
labels = targets[1:3]
df3 = df2[df2['label'].isin(list(labels))]

## Multiclass Random Forest Classification

In [None]:
X = df3.drop(['Target','label'], axis=1)
Y = df3['Target']

In [None]:
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42)
# Classify test data using random forest
clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X_train, y_train)
accuracy = clf.score(X_test,y_test)
print("Predictions:\n{}".format(clf.predict(X_test)))
print("Actual:\n{}".format(y_test[:10]))
print("Score:\n{}".format(accuracy))

## [WIP] Decision Tree Visualization


In [None]:
# FIXME: Test once number of features is less than 360

# Get features
features = df3.columns[:-2]
y = df3["Target"]
dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
dt.fit(X, y)

def visualize_tree(tree, feature_names):
    """Create tree png using graphviz.

    Args
    ----
    tree -- scikit-learn DecsisionTree.
    feature_names -- list of feature names.
    """
    with open("dt.dot", 'w') as f:
        export_graphviz(tree, out_file=f,
                        feature_names=feature_names)

    command = ["dot", "-Tpng", "dt.dot", "-o", "dt.png"]
    try:
        subprocess.check_call(command)
    except:
        exit("Could not run dot, ie graphviz, to "
             "produce visualization")
visualize_tree(dt,features)

## Random Forest

In [None]:
clf_forest = RandomForestClassifier(n_estimators=10)
clf_forest = clf_forest.fit(X_train, y_train)
print(clf_forest.score(X_test,y_test))

## Adaboost

In [None]:
clf_adaboost = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), n_estimators=10)
clf_adaboost = clf_adaboost.fit(X_train, y_train)
print(clf_adaboost.score(X_test,y_test))

## Bagging

In [None]:
clf_bagging = BaggingClassifier()
clf_bagging = clf_bagging.fit(X_train, y_train)
print(clf_bagging.score(X_test,y_test))

## Extra Trees

In [None]:
clf_extra_tree = ExtraTreesClassifier()
clf_extra_tree = clf_extra_tree.fit(X_train, y_train)
print(clf_extra_tree.score(X_test,y_test))
# print("Predictions:\n{}".format(clf_bagging.predict(X_test)))
# print("Actual:\n{}".format(y_test))

## Gradient Boosting

In [None]:
clf_gradient_boosting = GradientBoostingClassifier()
clf_gradient_boosting = clf_gradient_boosting.fit(X_train, y_train)
print(clf_gradient_boosting.score(X_test,y_test))
# print("Predictions:\n{}".format(clf_bagging.predict(X_test)))
# print("Actual:\n{}".format(y_test))

## Multilayer Perceptron

In [None]:
clf_mlpc = mlpc()
clf_mlpc = clf_mlpc.fit(X_train, y_train)
print(clf_mlpc.score(X_test,y_test))

## SVM

In [None]:
clf_svm = svm.SVC(decision_function_shape='ovo')
clf_svm = clf_svm.fit(X_train, y_train)
print(clf_svm.score(X_test,y_test))