# Gesture Classification with Ensemble Methods

In [None]:
import os
import numpy as np
import matplotlib
matplotlib.use('TkAgg') # For displaying animation
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.neural_network import MLPClassifier as mlpc
from sklearn import linear_model as lm
from sklearn import svm as svm
from helper_functions import *
from normalize_data import *
from numpy import array
from sklearn import preprocessing as pp
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split

%matplotlib notebook

## Load the Motion Data

Data is in x and y coordinates for each pixel. Each sample will be an array of 10 (frames) x 40 x 40 (capture window) x 2 (x and y) dimensions. 

## Load raw data for preprocessing

In [5]:
RAW_DATA_DIR = 'sonic_pi_face/data/'

# Get list of data files
data_files = get_data_files(RAW_DATA_DIR)

# Load data into a dictionary
# Note: Checks for incomplete data
data_dict = get_gesture_data(data_files)

## Visualize optical flow (Optional)

### Visualize individual frame (Optional)

In [None]:
gestures = list(data_dict)
print(gestures) # List gestures
sample = data_dict['open-close'][3] # 3rd Open-close sample
image = sample[4] # 5th frame of sample
# plt.imshow(image)
# plt.show()

### Visualize horizontal motion across frames (Optional)

In [7]:
sample = data_dict['open-close'][5] # Fifth sample
# anim = display_frames(sample)

## Feature Engineering

Find features that increase the sample classification.

### WIP - Histogram of Gradients

In [None]:
# FIXME: Complete HoG feature selection
data_open_close = np.asarray(data_dict['open-close'])
x_values = data_open_close[...,0].flatten()
y_values = data_open_close[...,1].flatten()
plt.hist(x_values,bins=20, normed=True)
plt.subplots()
plt.hist(y_values,bins=20, normed=True)
data_empty = np.asarray(data_dict['empty'])
x_values = data_empty[...,0].flatten()
y_values = data_empty[...,1].flatten()
plt.subplots()
plt.hist(x_values,bins=20,normed=True)
plt.subplots()
plt.hist(y_values,bins=20,normed=True)
plt.show()

### Feature optimization

In [None]:
# Load all pre-processed data sets if available.
data_sets = []
DATA_DIR = 'data'

# Number of rows and colums to permute for optical flow feature extraction
divs=[4,10,20]

if os.path.exists(DATA_DIR):
    for file in os.listdir(DATA_DIR):
        if file.endswith('.csv'):
            df = pd.read_csv(os.path.join(DATA_DIR,file))
            df = df.drop('Unnamed: 0',axis=1)
            data_sets.append(df)
else:
    # Generate data sets.
    print("Directory not found at {}\nPreprocessing data for "
        "optimization.".format(os.path.join(os.getcwd(),DATA_DIR)))
    data_sets = make_feature_sets(data_dict,divs=divs)
    save_data_sets(data_sets,divs=divs)

### Feature reduction with integral image

Integral image for fast feature evaluation.

#### Use random forests for comparing feature reduction levels. (Optional)

In [None]:
# Example: Reduce the features of one data set.
# Dataframe with 32 (16 * 2 (x and y coordinates)) dimensions
df_red = feature_extract(data_dict,cols=4,rows=4)

# Display comparison of feature reduction levels.
%matplotlib inline
ax = optimize_feature_dimensions(data_sets,divs,method='rf') # also use method='ada'
plt.show()

## Hyper-parameter Optimization with Random Search

Initialize random search module.

In [None]:
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV

# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(5, 25),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

# run randomized search
n_iter_search = 60

### Split data

In [None]:
# Prepare data
data = data_sets[0] # Choose middle (or argmax) feature set
gestures=['open-close','empty','waving-beauty-pageant-style']

# Pandas dataframe `data`
data = data[data['label'].isin(gestures)]
data.head()
data, targets = encode_target(data, 'label') # Encode target column

# Split into features and target
X, y = class_split(data,gestures=gestures)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

### Multiclass Random Forest Classification

In [None]:
# Classify test data using random forest
clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X_train, y_train)
accuracy = clf.score(X_test,y_test)

print("Predictions:\n{}".format(clf.predict(X_test)))
print("Actual:\n{}".format(y_test[:10]))
print("Score:\n{}".format(accuracy))

#FIXME
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search)

random_search.fit(X.values, y.values)
print("RandomizedSearchCV evaluated %d candidates"
      " parameter settings." % (n_iter_search))
report(random_search.cv_results_)

### Adaboost

In [None]:
clf_adaboost = AdaBoostClassifier(DecisionTreeClassifier(
        max_depth=3), n_estimators=10)
clf_adaboost = clf_adaboost.fit(X_train, y_train)
accuracy = clf_adaboost.score(X_test, y_test)
print("Predictions:\n{}".format(clf_adaboost.predict(X_test)))
print("Actual:\n{}".format(y_test[:10]))
print("Score:\n{}".format(accuracy))

### Bagging

In [None]:
clf_bagging = BaggingClassifier()
clf_bagging = clf_bagging.fit(X_train, y_train)
print(clf_bagging.score(X_test, y_test))

### Extra Trees

In [None]:
clf_extra_tree = ExtraTreesClassifier()
clf_extra_tree = clf_extra_tree.fit(X_train, y_train)
print(clf_extra_tree.score(X_test, y_test))

###  Gradient Boosting

In [None]:
clf_gradient_boosting = GradientBoostingClassifier()
clf_gradient_boosting = clf_gradient_boosting.fit(X_train, y_train)
print(clf_gradient_boosting.score(X_test,y_test))
# print("Predictions:\n{}".format(clf_bagging.predict(X_test)))
# print("Actual:\n{}".format(y_test))

### Multilayer Perceptron

In [None]:
clf_mlpc = mlpc()
clf_mlpc = clf_mlpc.fit(X_train, y_train)
print(clf_mlpc.score(X_test,y_test))

### SVM

In [None]:
clf_svm = svm.SVC(decision_function_shape='ovo')
clf_svm = clf_svm.fit(X_train, y_train)
print(clf_svm.score(X_test,y_test))