## Load the Motion Data

Data is in x and y coordinates for each pixel. Each sample will be an array of 10 (frames) x 40 x 40 (capture window) x 2 (x and y) dimensions. 

In [None]:
import numpy as np
import os
import matplotlib
import pandas as pd
# matplotlib.use('TkAgg') # For displaying animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from sklearn.tree import DecisionTreeClassifier, export_graphviz
from helper_functions import *
from normalize_data import *
from integral_try import *
from numpy import array
from sklearn import preprocessing as pp
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
%matplotlib notebook

In [None]:
data_dir = 'sonic_pi_face/data/'

# Get list of data files
data_files = get_data_files(data_dir)

# Load data into a dictionary
# Note: Checks for incomplete data
data = get_gesture_data(data_files)

# Normalize data across data samples and frames (x and y will be normalized independently)
# data = normalize_data(data)
print(np.shape(data['open-close']))

# Load first data sample from `open-close`
sample = data['open-close'][0]
print(np.shape(sample))

# calculate the integral image
iimage = i_image(sample[4])

# calculate the integral for the first quarter from the integral image
try_integral = get_integral(iimage,0,0,20,20)

print('Example integral of the 1st quarter of one frame: ', try_integral)

In [None]:
gestures = list(data)
print(gestures)
g1 = data[gestures[3]][0]

In [None]:
sample = g1[:,:,:,:]
image = sample[6]
plt.imshow(image[...,0])
plt.show()

In [None]:
get_integral(image,0,0,20,20)

In [None]:
def reduce_dimensions(rows = 2, cols = 2):
    array = np.zeros((10,rows*cols,2))
    # FIXME: Dynamically define x and y positions based on `rows` and `columns`
    sections= [(0,0,20-1,20-1),(0,20-1,20-1,40-1),(20-1,0,40-1,20-1),(20-1,20-1,40-1,40-1)]
    for ind,frame in enumerate(sample):
        image = i_image(frame)
        for sect_ind, section in enumerate(sections):
            feature = get_integral(image, *section)
            array[ind][sect_ind] = feature

In [None]:
iimage[...,0].sum()

## Visualize horizontal motion across frames

In [None]:
sample = data['open-close'][121] # Fifth sample
frame = sample[5][:,:,0] # Middle frame
# Note: Second argument takes 0 for `x`, 1 for `y` coordinate
# display_frames(sample,0) 

## Random forest classification

Create training and test data split for classification

In [None]:
def get_data(data, key):
    data_list = data[key]
    data_array = np.asarray(data_list)

    # Flatten array to n x 32000
    flattened_data = data_array.reshape((len(data_array),10*1600*2))
    return flattened_data

def scale(data, target_gesture):
    data *= (np.max(target_gesture) + np.abs(np.min(target_gesture))) - np.min(target_gesture)
    return data

In [None]:
# Load target gesture data
open_close_X = get_data(data, 'open-close')
open_close_Y = np.ones((len(open_close_X)))

In [None]:
# Load empty (no-gesture) data
empty_X = get_data(data, 'empty')
empty_Y = np.zeros(len(empty_X))

In [None]:
# Load other gesture
slide_horizontal_X = get_data(data,'slide-horizontally')
slide_horizontal_Y = np.zeros((len(slide_horizontal_X)))

In [None]:
# TODO: Evaluate mock data aids classification in practice
# create random mock data
# sham_X = np.random.randn(*open_close_X.shape)
# # scale as original data
# sham_X = scale(sham_X, open_close_X)
# sham_Y = np.zeros((len(sham_X)))

X = np.vstack([open_close_X, empty_X, slide_horizontal_X])
Y = np.hstack([open_close_Y, empty_Y, slide_horizontal_Y])

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42)

# Classify test data using random forest
clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X_train, y_train)
# clf.score(X_test,y_test)
print("Predictions:\n{}".format(clf.predict(X_test)))
print("Actual:\n{}".format(y_test))

## Pandas dataframes for organizing the data

In [None]:
# TODO: Convert all code to pandas for flexibility and display
label = pd.Series(['Open']*len(open_close_X))
df_open = pd.DataFrame(open_close_X)
df_open['label'] = label

label = pd.Series(['Empty']*len(empty_X))
df_empty = pd.DataFrame(empty_X)
df_empty['label'] = label

label = pd.Series(['Slide-H']*len(slide_horizontal_X))
df_slide_h = pd.DataFrame(slide_horizontal_X)
df_slide_h['label'] = label

df_X = pd.DataFrame()
df_X = df_X.append([df_open,df_empty,df_slide_h])

def encode_target(df, target_column):
    """Add column to df with integers for the target.

    Args
    ----
    df -- pandas DataFrame.
    target_column -- column to map to int, producing
                     new Target column.

    Returns
    -------
    df_mod -- modified DataFrame.
    targets -- list of target names.
    """
    df_mod = df.copy()
    targets = df_mod[target_column].unique()
    map_to_int = {name: n for n, name in enumerate(targets)}
    df_mod["Target"] = df_mod[target_column].replace(map_to_int)

    return (df_mod, targets)

df2, targets = encode_target(df_X, "label")

## Multiclass Random Forest Classification

In [None]:
X = df2.copy()
Y = df2['Target']
X.shape

In [None]:
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42)

# Classify test data using random forest
clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X_train, y_train)
# clf.score(X_test,y_test)
print("Predictions:\n{}".format(clf.predict(X_test)))
print("Actual:\n{}".format(y_test))

## [WIP] Decision Tree Visualization


In [None]:
# FIXME: Test once number of features is less than 360

# Get features
features = df2.columns[:-2]
y = df2["Target"]
dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
dt.fit(X, y)

def visualize_tree(tree, feature_names):
    """Create tree png using graphviz.

    Args
    ----
    tree -- scikit-learn DecsisionTree.
    feature_names -- list of feature names.
    """
    with open("dt.dot", 'w') as f:
        export_graphviz(tree, out_file=f,
                        feature_names=feature_names)

    command = ["dot", "-Tpng", "dt.dot", "-o", "dt.png"]
    try:
        subprocess.check_call(command)
    except:
        exit("Could not run dot, ie graphviz, to "
             "produce visualization")
visualize_tree(dt,features)