## Load the Motion Data

Data is in x and y coordinates for each pixel. Each sample will be an array of 10 (frames) x 40 x 40 (capture window) x 2 (x and y) dimensions. 

In [None]:
import numpy as np
import os
import matplotlib
matplotlib.use('TkAgg') # For displaying animation
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.neural_network import MLPClassifier as mlpc
from sklearn import linear_model as lm
from sklearn import svm as svm
from helper_functions import *
from normalize_data import *
from numpy import array
from sklearn import preprocessing as pp
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split

%matplotlib notebook

## Load raw data for preprocessing

In [None]:
RAW_DATA_DIR = 'sonic_pi_face/data/'

# Get list of data files
data_files = get_data_files(RAW_DATA_DIR)

# Load data into a dictionary
# Note: Checks for incomplete data
data = get_gesture_data(data_files)

## Visualize optical flow sample frame (Optional)

In [None]:
gestures = list(data)
print(gestures) # List gestures
sample = data['open-close'][3] # 3rd Open-close sample
image = sample[4] # 5th frame of sample
# plt.imshow(image)
# plt.show()

## Visualize horizontal motion across frames (Optional)

In [None]:
sample = data['open-close'][5] # Fifth sample
# anim = display_frames(sample)

### Feature optimization for hyperparameter search

In [None]:
# Load all pre-processed data sets if available.
data_sets = []
DATA_DIR = 'data'

divs=[4,10,20]

if os.path.exists(DATA_DIR):
    for file in os.listdir(DATA_DIR):
        if file.endswith('.csv'):
            df = pd.read_csv(os.path.join(DATA_DIR,file))
            df = df.drop('Unnamed: 0',axis=1)
            data_sets.append(df)
else:
    # Generate data sets.
    print("Directory not found at {}\nPreprocessing data for "
        "optimization.".format(os.path.join(os.getcwd(),DATA_DIR)))
    data_sets = get_feature_sets(data,divs=divs)
    save_data_sets(data_sets,divs=divs)

### Use random forests for optimization.

In [None]:
ax = optimize_feature_dimensions(data_sets,divs,method='rf') # also use method='ada'

In [None]:
data = data_sets[0] # Choose 4x4 features
gestures=['open-close','empty','slide-horizontally']

data = data[data['label'].isin(gestures)]
data.head()
data, targets = encode_target(data, 'label')
# data
# X = data.drop(['Target','label'], axis=1)
# Y = data['Target']

In [None]:
data['label'].unique()

In [None]:
X, Y = class_split(data,gestures=['open-close','empty','slide-horizontally'])
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42)

# Limit to `gesture` entries
data = data[data['label'].isin(gestures)]
data, targets = encode_target(data, 'label')
X = data.drop(['Target','label'], axis=1)
Y = data['Target']    
# X_list = []
# Y_list = []
# for gesture in gestures:
#     # Load target gesture data
#     X = get_data(data, gesture)
#     Y = np.ones((len(X)))
#     X_list.append(X)
#     Y_list.append(Y)

### Extract features using integral image

In [None]:
df_red = feature_extract(data,cols=4,rows=4) # Dataframe with 32 (16 * 2 (x and y coordinates)) dimensions

### Pandas dataframes for organizing the data

In [None]:
df2, targets = encode_target(df_red, "label")

### Select labels for training and testing

In [None]:
labels = targets[1:3]
df3 = df2[df2['label'].isin(list(labels))]

### Multiclass Random Forest Classification

In [None]:
X = df3.drop(['Target','label'], axis=1)
Y = df3['Target']

In [None]:
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42)

# Classify test data using random forest
clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X_train, y_train)
accuracy = clf.score(X_test,y_test)

print("Predictions:\n{}".format(clf.predict(X_test)))
print("Actual:\n{}".format(y_test[:10]))
print("Score:\n{}".format(accuracy))

## Random forest classification using flattened arrays

Create training and test data split for classification.

*Note: Using numpy arrays here.*

In [None]:
X, Y = class_split(data,gestures=['open-close','empty','slide_horizontal'])

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42)

# Classify test data using random forest
clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X_train, y_train)

print("Predictions:\n{}".format(clf.predict(X_test)))
print("Actual:\n{}".format(y_test))
print("Score for {}:\n{}".format(labels,clf.score(X_test,y_test)))

## [WIP] Decision Tree Visualization


In [None]:
# Get features
features = df3.columns[:-2]
y = df3["Target"]
dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
dt.fit(X, y)

visualize_tree(dt,features)

## Random Forest

In [None]:
clf_forest = RandomForestClassifier(n_estimators=10)
clf_forest = clf_forest.fit(X_train, y_train)
print(clf_forest.score(X_test,y_test))

## Adaboost

In [None]:
clf_adaboost, = adaboost(data)

## Bagging

In [None]:
clf_bagging = bagging(data)

## Extra Trees

In [None]:
clf_extra_tree = extra_trees(data)

## Gradient Boosting

In [None]:
clf_gradient_boosting = GradientBoostingClassifier()
clf_gradient_boosting = clf_gradient_boosting.fit(X_train, y_train)
print(clf_gradient_boosting.score(X_test,y_test))
# print("Predictions:\n{}".format(clf_bagging.predict(X_test)))
# print("Actual:\n{}".format(y_test))

## Multilayer Perceptron

In [None]:
clf_mlpc = mlpc()
clf_mlpc = clf_mlpc.fit(X_train, y_train)
print(clf_mlpc.score(X_test,y_test))

## SVM

In [None]:
clf_svm = svm.SVC(decision_function_shape='ovo')
clf_svm = clf_svm.fit(X_train, y_train)
print(clf_svm.score(X_test,y_test))