# Make-o-Matic Gesture Recognition

## Part 3: Machine Learning

2017 by Thomas Lidy, TU Wien

### Requirements

Python 2.7

pip install -r requirements.txt

Tested on OS: Ubuntu 16.04.3 LTS

In [12]:
import numpy as np
import pandas as pd
import json
import time # for time measuring
import datetime # for time printing

from scipy import stats
from scipy.signal import resample
from collections import Counter # for majority vote
from collections import OrderedDict # for color palette

# Machine Learning
from sklearn import preprocessing, svm
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [13]:
def str_to_int(string):
    '''cut away first character and convert to int - used to convert Gesture IDs like "G01" to 1'''
    return int(string[1:])

In [14]:
def timestr(seconds):
    ''' returns HH:MM:ss formatted time string for given seconds
    (seconds can be a float with milliseconds included, but only the integer part will be used)
    :return: string
    '''
    return str(datetime.timedelta(seconds=int(seconds)))

## Read Meta-Data

In [15]:
# main data

# original input
#csv_file = 'data/EXPORT_09042017173622.csv'

# preprocessed input
csv_file = 'data/EXPORT_09042017173622_preprocessed.csv'


# json files to translate gestures, parcours into long text
#gestures_file = 'data/gestures.json' # this is the file edited manually by us to conform to json
gestures_file = 'data/gestures.json.orig' # this is the file edited manually by us to conform to json
parcours_file = 'data/parcours.json'
mutations_file = 'data/mutations.json'

files = (gestures_file, parcours_file, mutations_file)
dataframes = []

# NOTE THAT THESE JSON FILES ARE NOT JSON CONFORM
# each line is a json string on its own, so we need to process the json line by line and combine THEN into a list

In [16]:
def get_oid(oid_dict):
    # get from the original representation {u'$oid': u'589c8ed31337b5ab1e1be121'} just the oid
    return oid_dict['$oid']

In [17]:
# get meta-files with descriptions of gestures, parcours and mutations
for filename in files:
    with open(filename) as f:
        lines = [line.rstrip('\n') for line in f]   # .decode("utf-8")

    lines = [json.loads(line) for line in lines]
    
    # convert list of json lines into Dataframe
    df = pd.DataFrame.from_dict(lines)
    
    # convert long $oid to short
    df['_id'] = df['_id'].apply(get_oid)
    
    # set the real id
    df.set_index('id', inplace=True)
    
    # convert index (ID) from string like 'G01' to int
    df.index = df.index.map(str_to_int)
    
    dataframes.append(df)

In [18]:
(gestures_df, parcours_df, mutations_df) = tuple(dataframes)

In [19]:
gestures_df

Unnamed: 0,_id,isGarbage,isNesture,name,slug
1,58a23a22d826756404709446,,,Single Rotation klein rechtsrum,rssr
2,58a23a22d826756404709447,,,Single Rotation klein linksrum,rssl
3,58a23a22d826756404709448,,,Oszillierende Rotation klein rechtsrum,rosr
4,58a23a22d826756404709449,,,Oszillierende Rotation klein linksrum,rosl
5,58a23a22d82675640470944a,,,Single Rotation groß rechtsrum,rsbr
6,58a23a22d82675640470944b,,,Single Rotation groß linksrum,rsbl
7,58a23a22d82675640470944c,,,Oszillierende Rotation groß rechtsrum,robr
8,58a23a22d82675640470944d,,,Oszillierende Rotation groß linksrum,robl
9,58a23a22d82675640470944e,,,Kontinuierliche Rotation groß rechtsrum,rcbr
10,58a23a22d82675640470944f,,,Kontinuierliche Rotation groß linksrum,rcbl


In [21]:
# "positive" gestures to recognize (not nestures)
gestures_pos = gestures_df[gestures_df['isNesture'] != True].index.tolist()
gestures_pos

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

In [22]:
# "negative" gestures (nestures)
gestures_neg = gestures_df[gestures_df['isNesture'] == True].index.tolist()
nestures = gestures_neg # synonym
gestures_neg

[14, 15, 16, 17, 18]

#### Define handy function shortcut

In [35]:
def gesture_name(gesture_id):
    if gesture_id is None: return None
    return gestures_df.loc[gesture_id,'name']

## Read Experiment Data

In [37]:
# Experiment Data
data = pd.read_csv(csv_file)

  interactivity=interactivity, compiler=compiler, result=result)


In [38]:
data.head(10)

Unnamed: 0,Trainset,Experiment,Subject,TimeStamp,RFID,GRASP_A,GRASP_B,GRASP_C,AX,AY,AZ,EX,EY,EZ,Parcours,Parcours_Step,Mutation,Host,Host/Spot,Gesture
0,_TRAINSET14022017094616,1,Andreas,0,0,781,8,797,0.06,-0.02,-0.1,216.8125,9.0625,-81.9375,101,1,151,8,,15
1,_TRAINSET14022017094616,1,Andreas,29001,0,782,0,799,0.09,-0.04,-0.11,217.0625,9.0625,-81.9375,101,1,151,8,,15
2,_TRAINSET14022017094616,1,Andreas,46136,0,782,6,798,0.12,-0.09,0.09,217.4375,9.125,-81.875,101,1,151,8,,15
3,_TRAINSET14022017094616,1,Andreas,74902,0,784,7,798,0.08,-0.08,0.03,217.625,9.125,-81.8125,101,1,151,8,,15
4,_TRAINSET14022017094616,1,Andreas,97663,0,781,0,798,0.07,-0.09,0.04,217.9375,9.1875,-81.75,101,1,151,8,,15
5,_TRAINSET14022017094616,1,Andreas,116448,0,784,4,800,0.12,-0.06,-0.03,218.3125,9.25,-81.75,101,1,151,8,,15
6,_TRAINSET14022017094616,1,Andreas,148753,0,783,0,798,0.21,-0.04,0.03,218.5,9.3125,-81.75,101,1,151,8,,15
7,_TRAINSET14022017094616,1,Andreas,167422,0,784,2,798,0.18,-0.1,-0.08,218.6875,9.375,-81.75,101,1,151,8,,15
8,_TRAINSET14022017094616,1,Andreas,187481,0,782,4,799,0.15,-0.18,-0.03,219.0,9.4375,-81.75,101,1,151,8,,15
9,_TRAINSET14022017094616,1,Andreas,213733,0,784,13,799,0.15,-0.18,-0.17,219.125,9.4375,-81.75,101,1,151,8,,15


### Iterate through the data

In [39]:
# see gestures per Parcours
group_by = ('Subject','Experiment','Trainset','Parcours')
data.groupby(group_by)['Gesture'].unique()

Subject  Experiment  Trainset                 Parcours
Alfred   2           _TRAINSET14022017144824  101              [15, 1, 17]
                     _TRAINSET14022017144923  102              [15, 2, 17]
                     _TRAINSET14022017145122  103               [15, 1, 2]
                     _TRAINSET14022017145237  104               [15, 2, 1]
                     _TRAINSET14022017145434  107              [15, 1, 17]
                     _TRAINSET14022017145514  108              [15, 2, 17]
                     _TRAINSET14022017145629  109               [15, 1, 2]
                     _TRAINSET14022017145751  110               [15, 2, 1]
                     _TRAINSET14022017145913  113              [15, 1, 17]
                     _TRAINSET14022017145944  114              [15, 2, 17]
                     _TRAINSET14022017150026  115               [15, 1, 2]
                     _TRAINSET14022017150110  116               [15, 2, 1]
                     _TRAINSET14022017150614 

In [40]:
# see gestures per Parcours Step
group_by = ('Subject','Experiment','Trainset','Parcours','Parcours_Step')
data.groupby(group_by)['Gesture'].unique()

Subject  Experiment  Trainset                 Parcours  Parcours_Step
Alfred   2           _TRAINSET14022017144824  101       1                [15]
                                                        2                 [1]
                                                        3                [17]
                                                        4                 [1]
                                                        5                [17]
                                                        6                 [1]
                                                        7                [17]
                                                        8                 [1]
                                                        9                [17]
                                                        10                [1]
                     _TRAINSET14022017144923  102       1                [15]
                                                        2               

## Replace Nestures

In [41]:
replace_nestures = True

In [42]:
group_by = ('Subject','Experiment','Trainset','Parcours','Mutation','Gesture')
group_df = data.groupby(group_by)
print "Originally", len(group_df), "individual gesture blocks"

Originally 2045 individual gesture blocks


In [43]:
data.groupby(group_by).count().head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,TimeStamp,RFID,GRASP_A,GRASP_B,GRASP_C,AX,AY,AZ,EX,EY,EZ,Parcours_Step,Host,Host/Spot
Subject,Experiment,Trainset,Parcours,Mutation,Gesture,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Alfred,2,_TRAINSET14022017144824,101,101,1,268,268,268,268,268,268,268,268,268,268,268,268,268,268
Alfred,2,_TRAINSET14022017144824,101,151,15,142,142,142,142,142,142,142,142,142,142,142,142,142,0
Alfred,2,_TRAINSET14022017144824,101,152,17,310,310,310,310,310,310,310,310,310,310,310,310,310,0
Alfred,2,_TRAINSET14022017144923,102,102,2,204,204,204,204,204,204,204,204,204,204,204,204,204,204
Alfred,2,_TRAINSET14022017144923,102,151,15,142,142,142,142,142,142,142,142,142,142,142,142,142,0
Alfred,2,_TRAINSET14022017144923,102,153,17,185,185,185,185,185,185,185,185,185,185,185,185,185,0
Alfred,2,_TRAINSET14022017145122,103,101,1,305,305,305,305,305,305,305,305,305,305,305,305,305,305
Alfred,2,_TRAINSET14022017145122,103,102,2,282,282,282,282,282,282,282,282,282,282,282,282,282,282
Alfred,2,_TRAINSET14022017145122,103,151,15,153,153,153,153,153,153,153,153,153,153,153,153,153,0
Alfred,2,_TRAINSET14022017145237,104,103,1,325,325,325,325,325,325,325,325,325,325,325,325,325,325


In [44]:
# Therefore Group by PARCOURS
# group data nicely, subdivided by Subject, Experiment, Trainset, Parcours
group_by = ('Subject','Experiment','Trainset','Parcours')

In [45]:
# Step 1: replace ALL Nestures by NaN
if replace_nestures:
    # make a copy of the complete data before altering anything
    data_nonest = data.copy()
    idx_nestures = data_nonest['Gesture'].isin(nestures)
    # replace nestures by NaN
    data_nonest.loc[idx_nestures,'Gesture'] = np.nan
    print data_nonest.head()

                  Trainset  Experiment  Subject  TimeStamp          RFID  \
0  _TRAINSET14022017094616           1  Andreas          0  000000000000   
1  _TRAINSET14022017094616           1  Andreas      29001  000000000000   
2  _TRAINSET14022017094616           1  Andreas      46136  000000000000   
3  _TRAINSET14022017094616           1  Andreas      74902  000000000000   
4  _TRAINSET14022017094616           1  Andreas      97663  000000000000   

   GRASP_A  GRASP_B  GRASP_C    AX    AY    AZ        EX      EY       EZ  \
0      781        8      797  0.06 -0.02 -0.10  216.8125  9.0625 -81.9375   
1      782        0      799  0.09 -0.04 -0.11  217.0625  9.0625 -81.9375   
2      782        6      798  0.12 -0.09  0.09  217.4375  9.1250 -81.8750   
3      784        7      798  0.08 -0.08  0.03  217.6250  9.1250 -81.8125   
4      781        0      798  0.07 -0.09  0.04  217.9375  9.1875 -81.7500   

   Parcours  Parcours_Step  Mutation  Host Host/Spot  Gesture  
0       101     

In [46]:
# now we can use the Forward FILL and Backward FILL methods of Pandas
# to replace the NaNs by the values that come before or after

# BUT: we shall not do that across Parcours/Experiments!

In [47]:
# GROUPBY helps us here to apply the fill methods only within a PARCOURS

if replace_nestures:
    # BACKWARD FILL first by later values to NaNs before
    data_nonest = data_nonest.groupby(group_by).bfill()

    # in case there would be NaNs left, do also a FORWARD FILL
    #data = data.groupby(group_by).ffill()
    
    print "Replaced Nestures by filling with neighboured Gestures!"
    print np.isnan(data_nonest['Gesture']).sum(), "NaN values remaining. Should be 0."
    # NOTE: bfill applies to ALL COLUMNS! so there might be other columns affected by this!
    # TODO double-check any side effects!
    
    # adding NaNs cause the Gesture column to be converted from int to float
    # we convert back to int

    data_nonest['Gesture'] = data_nonest['Gesture'].astype(int)

Replaced Nestures by filling with neighboured Gestures!
0 NaN values remaining. Should be 0.


In [48]:
# check via groupby:
group_by = ('Subject','Experiment','Trainset','Parcours','Gesture')
group_df = data_nonest.groupby(group_by)
print "After nesture replacement", len(group_df), "individual gesture blocks"

After nesture replacement 720 individual gesture blocks


In [49]:
group_df.count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,TimeStamp,RFID,GRASP_A,GRASP_B,GRASP_C,AX,AY,AZ,EX,EY,EZ,Parcours_Step,Mutation,Host,Host/Spot
Subject,Experiment,Trainset,Parcours,Gesture,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Alfred,2,_TRAINSET14022017144824,101,1,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720
Alfred,2,_TRAINSET14022017144923,102,2,531,531,531,531,531,531,531,531,531,531,531,531,531,531,531
Alfred,2,_TRAINSET14022017145122,103,1,458,458,458,458,458,458,458,458,458,458,458,458,458,458,458
Alfred,2,_TRAINSET14022017145122,103,2,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282
Alfred,2,_TRAINSET14022017145237,104,1,325,325,325,325,325,325,325,325,325,325,325,325,325,325,325
Alfred,2,_TRAINSET14022017145237,104,2,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476
Alfred,2,_TRAINSET14022017145434,107,1,576,576,576,576,576,576,576,576,576,576,576,576,576,576,576
Alfred,2,_TRAINSET14022017145514,108,2,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518
Alfred,2,_TRAINSET14022017145629,109,1,292,292,292,292,292,292,292,292,292,292,292,292,292,292,292
Alfred,2,_TRAINSET14022017145629,109,2,234,234,234,234,234,234,234,234,234,234,234,234,234,234,234


In [50]:
# keep original data in a variable
data_orig = data

In [51]:
# from here on we use data again for data_nonest

if replace_nestures:
    data = data_nonest

## Data Pre-Procssing Part I

### Which Sensor Parameters to use?

In [52]:
include_GRASP = True

if include_GRASP:
    params = ['AX', 'AY', 'AZ', 'EX', 'EY', 'EZ', 'GRASP_A', 'GRASP_B', 'GRASP_C']
else:
    params = ['AX', 'AY', 'AZ', 'EX', 'EY', 'EZ']

# TODO add RFID?

### Global Normalize?

#### Normalize Parameter columns to -1, 1

here it's done globally. if set to False, there is an option to do it locally later

In [53]:
normalize_global = False
# normalize_global means we normalize all parameter columns at once, globally => NO LATER TREATMENT

In [54]:
data[params].head()

Unnamed: 0,AX,AY,AZ,EX,EY,EZ,GRASP_A,GRASP_B,GRASP_C
0,0.06,-0.02,-0.1,216.8125,9.0625,-81.9375,781,8,797
1,0.09,-0.04,-0.11,217.0625,9.0625,-81.9375,782,0,799
2,0.12,-0.09,0.09,217.4375,9.125,-81.875,782,6,798
3,0.08,-0.08,0.03,217.625,9.125,-81.8125,784,7,798
4,0.07,-0.09,0.04,217.9375,9.1875,-81.75,781,0,798


In [55]:
if normalize_global:
    # normalize to -1, 1
    data[params] = preprocessing.minmax_scale(data[params], feature_range=(-1, 1), axis=0, copy=False)

In [56]:
data[params].head()

Unnamed: 0,AX,AY,AZ,EX,EY,EZ,GRASP_A,GRASP_B,GRASP_C
0,0.06,-0.02,-0.1,216.8125,9.0625,-81.9375,781,8,797
1,0.09,-0.04,-0.11,217.0625,9.0625,-81.9375,782,0,799
2,0.12,-0.09,0.09,217.4375,9.125,-81.875,782,6,798
3,0.08,-0.08,0.03,217.625,9.125,-81.8125,784,7,798
4,0.07,-0.09,0.04,217.9375,9.1875,-81.75,781,0,798


## Get Isolated Gestures

### Groupings for each Gesture (by Subject, Experiment, Trainset, Parcours and Mutation)

to be further processed for learning

In [100]:
# GET INDIVIDUAL GESTURES 
# group data nicely, subdivided by Subject, Experiment, Trainset, Parcours, Gesture

if replace_nestures:
    # NOTE: we HAVE to remove Mutation here! otherwise the Gestures merged by replacing Nestures will still be SEPARATE
    group_by = ('Subject','Experiment','Trainset','Parcours','Gesture')
else:
    group_by = ('Subject','Experiment','Trainset','Parcours','Mutation','Gesture')

group_df = data.groupby(group_by)
group_df.mean().head(100)  # mean is not meaningful here as aggregation - just to print the structure of the data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,TimeStamp,GRASP_A,GRASP_B,GRASP_C,AX,AY,AZ,EX,EY,EZ,Parcours_Step,Mutation,Host
Subject,Experiment,Trainset,Parcours,Gesture,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Alfred,2,_TRAINSET14022017144824,101,1,9568344.52,819.60,768.27,818.40,0.11,0.03,-0.21,181.01,31.66,-91.94,4.66,132.82,8.00
Alfred,2,_TRAINSET14022017144923,102,2,7422825.58,819.11,768.63,814.25,0.01,0.01,-0.10,172.92,30.76,-108.31,4.90,132.87,8.00
Alfred,2,_TRAINSET14022017145122,103,1,8082322.41,812.21,748.25,811.67,-0.01,-0.06,-0.13,180.24,34.25,-101.92,4.12,117.70,8.00
Alfred,2,_TRAINSET14022017145122,103,2,13709221.39,819.00,762.88,823.39,0.05,-0.08,-0.15,153.22,34.29,-110.01,7.12,102.00,8.00
Alfred,2,_TRAINSET14022017145237,104,1,14005074.12,837.17,691.34,840.79,0.01,-0.11,-0.20,193.21,31.54,-79.57,7.01,103.00,8.00
Alfred,2,_TRAINSET14022017145237,104,2,9481434.70,827.71,684.75,832.52,0.07,-0.02,-0.10,189.96,31.64,-88.26,4.83,115.36,8.00
Alfred,2,_TRAINSET14022017145434,107,1,8033683.30,764.59,616.94,785.85,0.02,0.01,-0.21,133.34,18.91,-61.03,4.76,135.89,8.00
Alfred,2,_TRAINSET14022017145514,108,2,7227369.08,773.50,681.71,781.35,-0.18,-0.13,-0.18,120.67,25.54,-64.99,4.80,134.78,8.00
Alfred,2,_TRAINSET14022017145629,109,1,6125707.72,788.98,669.30,777.66,0.01,0.14,-0.19,141.98,15.16,-50.17,4.57,119.36,8.00
Alfred,2,_TRAINSET14022017145629,109,2,9324114.25,799.34,676.02,785.18,-0.13,-0.17,-0.11,131.61,16.49,-52.85,6.84,108.00,8.00


In [101]:
print len(group_df), "individual gesture blocks"

720 individual gesture blocks


## Get Gesture Data: 1 Block per each individual Gesture

we put each time series that belong to 1 particular gesture in a particular parcours into a dictionary,
which contains a list of such time series blocks per gesture entry in the dict

In [59]:
# now we ITERATE nicely through group_df and get each Gesture block individually
# -> group_data will be a dataframe just for a single gesture

i=0
# dictionary containing a list of sub-datasets for each gesture, to train ML
gesture_exp_dict = {}

for name_tuple, group_data in group_df:
    i += 1
    #print str(name_tuple)
    gesture = name_tuple[-1]  # gesture is last element of tuple, as defined in group_by above
    
    # initalize empty list for this gesture
    if gesture not in gesture_exp_dict.keys():
        gesture_exp_dict[gesture] = [] 
        
    # add data to gesture dict
    gesture_exp_dict[gesture].append(group_data)
    
    # NOTE that group_data here still contains ALL data columns. we will redue to params later

print "DONE:", i, "gesture blocks"

DONE: 720 gesture blocks


In [60]:
# How many data blocks = training examples do we have for each gesture
for gest in sorted(gesture_exp_dict.keys()):
    print "G", gest, '\t', len(gesture_exp_dict[gest]), "training data blocks", '\t', gesture_name(gest) 

G 1 	120 training data blocks 	Single Rotation klein rechtsrum
G 2 	120 training data blocks 	Single Rotation klein linksrum
G 3 	81 training data blocks 	Oszillierende Rotation klein rechtsrum
G 4 	81 training data blocks 	Oszillierende Rotation klein linksrum
G 5 	60 training data blocks 	Single Rotation groß rechtsrum
G 6 	60 training data blocks 	Single Rotation groß linksrum
G 7 	20 training data blocks 	Oszillierende Rotation groß rechtsrum
G 8 	20 training data blocks 	Oszillierende Rotation groß linksrum
G 9 	20 training data blocks 	Kontinuierliche Rotation groß rechtsrum
G 10 	20 training data blocks 	Kontinuierliche Rotation groß linksrum
G 11 	66 training data blocks 	LinearMovement Single
G 12 	32 training data blocks 	LinearMovement Oszillierend
G 13 	20 training data blocks 	Drücken


In [61]:
# how many data points (= samples or timesteps) does each data block have?

data_sizes = {} # collect per gesture in dict
data_sizes_total = [] # collect all in list

print "average data length (number of samples) per gesture:"

for gest in sorted(gesture_exp_dict.keys()):
    print "G", gest, ':\t', 
    data_sizes[gest] = []
    for datablock in gesture_exp_dict[gest]:
        size = datablock.shape[0]
        #print size,
        # TODO data_sizes ...
        data_sizes[gest].append(size)
        data_sizes_total.append(size)
    print int(np.mean(data_sizes[gest]))

average data length (number of samples) per gesture:
G 1 :	650
G 2 :	577
G 3 :	1365
G 4 :	1302
G 5 :	687
G 6 :	561
G 7 :	2861
G 8 :	2413
G 9 :	464
G 10 :	466
G 11 :	1027
G 12 :	1965
G 13 :	905


In [62]:
print min(data_sizes_total), max(data_sizes_total)

168 4124


In [63]:
# average data length (number of samples)
print "Average data length (number of samples) of all gestures"
avg_data_len = int(np.mean(data_sizes_total))
avg_data_len

Average data length (number of samples) of all gestures


988

In [65]:
samples = avg_data_len

In [67]:
# timestamp delta
#max(timestamps2) / len(signal_resampled2) 

In [68]:
# TODO compute sampling rate across all input, not just this one
#sampling_rate = 1.0 / (max(timestamps2) / 1000000.0/ len(signal_resampled2)) # / 1000 = ms to sec
#sampling_rate

NameError: name 'timestamps2' is not defined

In [69]:
# TEMPORARY WORKAROUND:

sampling_rate = 55.131949359557268

# (from testing.ipynb)

# TODO compute real overall sampling rate

## Pre-Processing of the Signals

### Reduce Data to desired parameter columns

In [64]:
# in the group_df iteration before, we kept all data columns 
# now we ITERATE over the gesture_exp_dict again, retaining only the parameter columns

gesture_dict_params = {}
n_datablocks = 0

for g in sorted(gesture_exp_dict.keys()):
    print "G" + str(g) +'\t',
        
    #initalize empty list for this gesture
    gesture_dict_params[g] = [] 
            
    for datablock in gesture_exp_dict[g]:

        # reduce to params columns
        datablock_params = datablock[params] # .T # prevously: # transpose: 9 data rows with params, cols is time series
        
        # add data to new gesture dict
        gesture_dict_params[g].append(datablock_params)
        
        n_datablocks += 1
    
    print len(gesture_dict_params[g]), "data blocks"
print

G1	120 data blocks
G2	120 data blocks
G3	81 data blocks
G4	81 data blocks
G5	60 data blocks
G6	60 data blocks
G7	20 data blocks
G8	20 data blocks
G9	20 data blocks
G10	20 data blocks
G11	66 data blocks
G12	32 data blocks
G13	20 data blocks



### Low-Pass Filter - Testing

removing high frequencies (little fluctuations which are probably not relevant)

In [70]:
# source code from https://stackoverflow.com/questions/25191620/creating-lowpass-filter-in-scipy-understanding-methods-and-units

from scipy.signal import butter, lfilter, freqz

def butter_lowpass(cutoff, fs, order=5):
    '''cutoff: cutoff frequency in Hz
    fs: sampling rate in Hz'''
    nyq = 0.5 * fs # Nyquist frequency is half the sampling rate.
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def butter_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [71]:
# Filter settings
#fs = 30.0       # sample rate, Hz
fs = sampling_rate   # determined before by average time delta # TODO improve its computation

# CHOOSE HERE desired cutoff frequency of the filter Hz
order = 1 #3 #5 #6

cutoff = 4 #Hz
#cutoff = 3.667 
#cutoff = 1.3
#cutoff = 0.667 
#cutoff = 0.5
#cutoff = 0.33

In [73]:
def preprocess_signal(testdata, 
                      normalize=False, 
                      resampling=False, n_samples=None, timestamps=None, window='hann', 
                      filtering=False):
    
    # Min/max normalization
    # Note: to do it the fully right way, the minmax scaling should be done on all training data coherently
    # (currently its done per training block) and the same scaling values (min and max) should be reused here
    # see http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
    if normalize:
        testdata = preprocessing.minmax_scale(testdata, feature_range=(-1, 1), axis=0)
        
    # Time Resampling
    if resampling:
        
        if n_samples is None:
            # if not a FIXED number of samples is provided, the number of samples stays the same as in the input signal
            n_samples = testdata.shape[0] 
        
        if timestamps is None:
            testdata = resample(testdata, num=n_samples, window=window)
        else:
            # if provided, we use the original timestamps to re-align the signal
            # TODO check: n_samples must match len(timestamps)
            testdata, timestamps2 = resample(testdata, num=n_samples, t=timestamps, window='hann')
        

    if filtering:
        # filter the signal block with low-pass filter
        testdata = butter_lowpass_filter(testdata, cutoff, fs, order)
        
    return testdata

## Feature Calculation

### Zero Crossing Rate

In [74]:
def calc_zero_crossings(datablock, normalized=False):
    '''computes row-wise zerocrossings'''
    # datablock is assumed to be pandas Dataframe and to have multiple signals in the rows
    # example for 1 signal row:
    #zcr = np.signbit(signal).diff().abs().mean()
    # for multiple signal rows:
    zcr = np.signbit(datablock).astype(int).diff(axis=0).abs().mean(axis=0)
    
    if normalized:
        # divide by length of signal, otherwise it will be directly related to the size of the chosen window
        zcr = zcr / datablock.shape[0]
    return zcr

### Statistical Features

In [75]:
# Calc statistical features

def calc_statistical_features(matrix, axis=0):

    # to define the proper output shape, we need the "other axis" of the input shape (not the one where we compute along)
    other_axis = int(not axis) 
    n_rows = matrix.shape[other_axis]
    
    result = np.zeros((n_rows,7))
    
    result[:,0] = np.mean(matrix, axis=axis)
    result[:,1] = np.var(matrix, axis=axis, dtype=np.float64) 
    result[:,2] = stats.skew(matrix, axis=axis)
    result[:,3] = np.median(matrix, axis=axis)
    result[:,4] = np.min(matrix, axis=axis)
    result[:,5] = np.max(matrix, axis=axis)
    result[:,6] = stats.kurtosis(matrix, axis=axis, fisher=False) # Matlab calculates Pearson's Kurtosis

    result[np.where(np.isnan(result))] = 0
    return result

### Function to compute All features

In [76]:
def calc_all_features(in_data, calc_derivative=False, calc_zerocrossings=False):

    # calc statistical features
    features = calc_statistical_features(in_data, axis=0)

    # vectorize
    features = features.flatten()

    if calc_derivative:
        # calc derivative of all signals
        in_data_deriv = np.gradient(in_data, axis=0)
        # calc statistics of derivatives
        features_deriv = calc_statistical_features(in_data_deriv, axis=0)
        # vectorize
        features_deriv = features_deriv.flatten()
        # concatenate to other features
        features = np.concatenate((features,features_deriv))

    if calc_zerocrossings:
        features_zcr = calc_zero_crossings(in_data)
        features = np.concatenate((features,features_zcr))

    return features

## Start Feature Calculation

#### Set Options here:

In [77]:
# OPTIONS:

# either/or:
use_lowpassfilter = False
use_normalized = True 
use_resampled = True 
# if both are False, unresampled unnormalized input is used

# other options: # True is better for all
exclude_non_gestures = True
calc_derivative = True
calc_zerocrossings = True

In [79]:
if exclude_non_gestures:
    gestures_to_process = gestures_pos
else:
    gestures_to_process = input_dict.keys()

In [80]:
# NEW!!!!!
# we added preprocess_signal() function below, thats why we need to use the original gesture_dict as input
# WE DONT USE THE BATCH PROCESSED INPUT ANYMORE

input_dict = gesture_dict_params # non resampled

In [81]:
# COMPUTE FEATURES
# LOOP over all gesture data to create features

# initialize feature output for training data as a list
train_list = []
train_classes_num = []

for gest in sorted(gestures_to_process):
    print "G", gest, ':\t', len(input_dict[gest]), "examples"
    
    for in_data in input_dict[gest]:
        #print datablock.shape, 
        
        #if use_resampled:
        #    # resampled data has already extracted the param columns
        #    in_data = datablock
        #else:
        #    # for non-resampled we have to get the relevant data columns and transpose
        #    in_data = datablock[params].T
        
        # preprocessing
        in_data = preprocess_signal(in_data, use_normalized, 
                                    use_resampled, samples, timestamps=None, window=None, # 'hann'
                                    filtering=use_lowpassfilter)
                
        # convert to dataframe cause we use pandas .diff() in ZCR computation
        in_data = pd.DataFrame(in_data, columns=params)

        # calculate features
        features = calc_all_features(in_data, calc_derivative, calc_zerocrossings)

        # append to output list
        train_list.append(features)
        
        # store class (gesture number) for these features
        train_classes_num.append(gest)

G 1 :	120 examples
G 2 :	120 examples
G 3 :	81 examples
G 4 :	81 examples
G 5 :	60 examples
G 6 :	60 examples
G 7 :	20 examples
G 8 :	20 examples
G 9 :	20 examples
G 10 :	20 examples
G 11 :	66 examples
G 12 :	32 examples
G 13 :	20 examples


In [82]:
features.shape

(135,)

## Machine Learning

### Prepare Training Data

In [83]:
print "Training data:", len(train_list), "examples"

Training data: 720 examples


In [84]:
# make feature array from feature list (ALL training data)

train_data = np.array(train_list)
#del train_list
train_data.shape

(720, 135)

In [85]:
# verify if the training categories (gesture numbers) have the same length
len(train_classes_num)

720

### Standardize

Zero-mean unit-variance Standardization

In [86]:
# ad-hoc scaling
# train_data = preprocessing.scale(train_data,axis=0)
# axis=0 means independently standardize each feature, otherwise (if 1) standardize each sample

In [87]:
# we now user StandardScaler class to keep the mean and variance for later
standardizer = preprocessing.StandardScaler()
train_data = standardizer.fit_transform(train_data)

### Train/Test Set Split

In [88]:
# split the data into train/test set

testset_size = 0.25

# sklearn >= 0.18
# use random_state to avoid that the results fluctuate randomly
splitter = StratifiedShuffleSplit(n_splits=1, test_size=testset_size, random_state=0) 
splits = splitter.split(train_data, train_classes_num)

# Note: this for loop is only executed once, if n_splits==1
for train_index, test_index in splits:
    #print "TRAIN INDEX:", train_index
    #print "TEST INDEX:", test_index
    
    # split the data
    train_set = train_data[train_index]
    test_set = train_data[test_index]
    
    # and the numeric classes (groundtruth)
    train_classes = np.array(train_classes_num)[train_index]
    test_classes = np.array(train_classes_num)[test_index]
    
    print "TRAIN SIZE:", train_set.shape
    print "TEST SIZE:", test_set.shape
    

TRAIN SIZE: (540, 135)
TEST SIZE: (180, 135)


## 1) Gesture Regonition - isolated (+ independent of host)

### ML Algorithm: SVM

Support Vector Machines

In [89]:
# try 3 different SVM kernels
kernels = ['linear','poly','rbf']

In [90]:
models = {}

for kernel in kernels:
    print "SVM", kernel,
    
    # TRAIN 
    start_time = time.time() # measure time

    model = OneVsRestClassifier(SVC(kernel=kernel)) #, degree=degree)) #, n_jobs=-1)  # n_jobs = n cpus, -1 = all
    # full set
    #model.fit(train_data, train_classes_num)
    # train set
    model.fit(train_set, train_classes)
    
    # store in dict
    models[kernel] = model

    end_time = time.time()
    print "Training time:", timestr(end_time - start_time)

SVM linear Training time: 0:00:00
SVM poly Training time: 0:00:00
SVM rbf Training time: 0:00:00


#### Verification on Train Set (just for plausibility)

In [91]:
# predict on train set
pred_train = model.predict(train_set)
pred_train

array([ 6,  2,  1,  5,  9,  4, 10,  1,  5, 11,  3, 12,  4,  7, 12, 12,  2,
        2,  1,  1,  1,  6,  1,  6,  3, 11, 12,  3,  2,  4,  4,  1,  1,  9,
        4,  1,  1, 11, 11,  1, 11, 13,  4,  1,  2,  4,  2,  8,  6,  4, 10,
        6,  9,  5,  4,  5, 11,  7,  1,  5,  8,  3, 11,  6,  3, 11,  6, 11,
        6, 13,  1, 11,  3,  1, 11,  1, 11, 13,  5, 13,  3,  3,  1,  6,  1,
        1,  2,  8, 12,  2,  1,  2, 12,  1,  2,  3,  3,  4,  1, 10,  6, 11,
        2,  1,  4,  2, 11,  2,  2,  1,  5,  6,  3, 11,  6,  3,  1,  5, 12,
        9,  2,  1,  1,  2,  3,  2,  2,  4, 11,  2,  6,  4,  1, 11,  9,  1,
       12,  2,  8,  6,  1,  1, 11,  7, 11, 12,  5,  6, 11,  3,  1, 12, 10,
        3,  3,  2,  3,  5, 13,  4,  4,  4,  1,  2,  2,  6,  3,  5, 10,  5,
        2,  1,  1, 13, 11,  2, 12,  5,  1,  3, 11, 10, 11,  5,  6,  5,  3,
        1,  4,  3,  5,  6, 10,  4,  5,  3,  1,  9, 11,  2,  8,  6,  4,  3,
       11,  6,  6, 11,  2, 11,  4,  1,  1,  4,  2,  2, 12, 11,  9,  2,  7,
        6,  3, 11,  1,  1

In [92]:
train_classes

array([ 6,  2,  1,  5,  9,  4, 10,  1,  5, 11,  3, 12,  4,  7, 12, 12,  1,
        2,  1,  1,  1,  5,  1,  6,  3, 11, 12,  3,  2,  4,  4,  1,  1,  9,
        4,  1,  1, 11, 11,  1, 11, 13,  4,  1,  2,  4,  2,  8,  6,  4, 10,
        6,  9,  5,  4,  5, 11,  7,  1,  5,  8,  3, 11,  6,  3, 11,  6, 11,
        6, 13,  1, 11,  3,  1, 11,  1, 11, 13,  5, 13,  3,  3,  1,  6,  1,
        1,  2,  8, 12,  2,  1,  2, 12,  1,  2,  3,  3,  4,  1, 10,  6, 11,
        2,  1,  4,  2, 11,  2,  2,  1,  5,  6,  3, 11,  6,  3,  1,  5, 12,
        9,  2,  1,  1,  2,  3,  2,  2,  4, 11,  2,  6,  4,  1, 11,  9,  1,
       12,  2,  8,  6,  1,  1, 11,  7, 11, 12,  5,  6, 11,  3,  1, 12, 10,
        3,  3,  2,  3,  5, 13,  4,  4,  4,  1,  2,  2,  6,  3,  5, 10,  5,
        2,  1,  1, 13, 11,  2, 12,  5,  1,  3, 11, 10, 11,  5,  6,  5,  3,
        1,  4,  3,  5,  6, 10,  4,  5,  3,  1,  9, 11,  2,  8,  6,  4,  3,
       11,  6,  6, 11,  2, 11,  4,  1,  1,  4,  2,  2, 12, 11,  9,  2,  7,
        1,  3, 11,  1,  1

In [93]:
# Accuracy on train set (manual computation)
np.sum(pred_train == train_classes) * 1.0 / len(train_classes)

0.98703703703703705

In [94]:
# Accuracy on train set (using scikit-learn)
accuracy_score(train_classes, pred_train)

0.98703703703703705

## Evaluation

### Evaluation - Overall

In [95]:
result_ov = pd.DataFrame(index=kernels, columns=['Accuracy','Precision','Recall','F-Measure'])

In [96]:
for k in kernels:
    # predict on TEST set
    pred_test = models[k].predict(test_set) 
    
    # Accuracy, Precision, Reacall on TEST set
    result_ov.loc[k,'Accuracy'] = accuracy_score(test_classes, pred_test)
    result_ov.loc[k,'Precision'] = precision_score(test_classes, pred_test, average='macro')
    result_ov.loc[k,'Recall'] = recall_score(test_classes, pred_test, average='macro')
    result_ov.loc[k,'F-Measure'] = f1_score(test_classes, pred_test, average='macro')

In [97]:
pd.options.display.float_format = '{:,.2f}'.format
result_ov*100

Unnamed: 0,Accuracy,Precision,Recall,F-Measure
linear,70.0,74.94,72.49,73.09
poly,81.11,85.64,87.37,85.88
rbf,81.11,83.39,87.88,84.74


### Evaluation - Per Gesture

In [98]:
# manual selection which one was the best one
best_model = models['poly']
pred_test = best_model.predict(test_set) 

In [99]:
# TODO check if the sorting of precision_score etc. is really in this order!!
labels = sorted(np.unique(test_classes))
gesture_names = [gesture_name(l) for l in labels]

In [None]:
# nice result dataframe
columns = ['Gesture','N_train','N_test','Precision','Recall','F1']
result_df = pd.DataFrame(index=labels,columns=columns)
result_df['Gesture'] = gesture_names

In [None]:
# number of train / test instances
values, counts = np.unique(train_classes, return_counts=True)
result_df['N_train'] = pd.Series(counts, index=values)
values, counts = np.unique(test_classes, return_counts=True)
result_df['N_test'] = pd.Series(counts, index=values)

In [None]:
# per class evaluation
result_df['Precision'] = precision_score(test_classes, pred_test, average=None) * 100
result_df['Recall'] = recall_score(test_classes, pred_test, average=None) * 100
result_df['F1'] = f1_score(test_classes, pred_test, average=None) * 100

In [None]:
result_df

In [None]:
# compare average P, R and F to overall P, R and F above (same)
result_df.mean(axis=0)

In [None]:
# Confusion Matrix
conf = confusion_matrix(test_classes, pred_test, labels=labels) # labels defines the order
labels_long = gestures_df.loc[labels,'name']
conf_df = pd.DataFrame(conf, index=labels_long, columns=labels)
conf_df

## 2) Continuous Time Series Prediction

What is our input stream?

The data of 1 trainset, because after each trainset, the TimeStamp is reset.

In [None]:
# a) loop over each Trainset
#group_by = ('Subject','Experiment','Trainset')

# b) use Experiment as the block where we do predictions (means it includes timestamp resets!!)
group_by = ('Subject','Experiment')

group_df = data.groupby(group_by)
group_df.max().head(50) 

In [None]:
print len(group_df), "Experiments / Trainsets"

In [None]:
# iterate over each Trainset
i =0
for name_tuple, group_data in group_df:
    i += 1
    #print str(name_tuple)
    
    if len(name_tuple) == 3:
        subject, exp, trainset = name_tuple
    elif len(name_tuple) == 2:
        subject, exp = name_tuple
        trainset = None
    
    break # for testing we just do 1 loop
    

In [None]:
name_tuple

In [None]:
group_data['TimeStamp'].min()

In [None]:
group_data['TimeStamp'].max()

In [None]:
if len(name_tuple) == 3:
    # check if TimeStamps are monotonously increasing
    if not np.all(group_data['TimeStamp'].diff()[1:] > 0):
        raise ValueError("Time Stamps are not monotonously increasing!")

In [None]:
# set these to None so that plot title is not shown wrongly
parcours = None
mutation = None
gesture = None

In [None]:
# which gestures appear in this Experiment or Trainset
group_data['Gesture'].unique()

In [None]:
one_plot(group_data,'TimeStamp')

In [None]:
grid_plot(group_data)

### Pre-Process the Data - Testing

the same way as it was done for training set

In [None]:
pd.options.display.float_format = '{:,.5f}'.format

In [None]:
# get the relevant columns out of group_data

In [None]:
timestamps = group_data['TimeStamp'].tolist()

In [None]:
test_gestures = group_data['Gesture'].tolist()

In [None]:
# 9 parameters columns
testdata = group_data[params]
testdata.shape

In [None]:
# Min/max normalization
# Note: to do it the fully right way, the minmax scaling should be done on all training data coherently
# (currently its done per training block) and the same scaling values (min and max) should be reused here
# see http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
# TODO store minmax_scale from training data and reapply same scaling here

if use_normalized:
    testdata = preprocessing.minmax_scale(testdata, feature_range=(-1, 1), axis=0, copy=make_copy)

In [None]:
# convert to dataframe cause plot needs column names
testdata = pd.DataFrame(testdata, columns=params)

grid_plot(testdata)

In [None]:
testdata.head(15)

In [None]:
# time resample

n_samples = len(timestamps)  

if use_resampled:
    # the number of samples stays the same
    # but we use the original timestamps to re-align the signal
    testdata_res, timestamps2 = resample(testdata, num=n_samples, t=timestamps)
    
    # convert to dataframe cause plot needs column names
    testdata_res = pd.DataFrame(testdata_res, columns=params)

In [None]:
timestamps[:15]

In [None]:
timestamps2[:15]

In [None]:
# timestamps are now equidistant
timestamps2[1:15] - timestamps2[:14]

In [None]:
testdata_res.head(15)

In [None]:
# debug check whether the values have been altered -> OK
#testdata == testdata_res

In [None]:
# overwrite testdata with testdata_res for subsequent coherent usage
#testdata = testdata_res

In [None]:
grid_plot(testdata)

### Continuous Prediction

In [None]:
# for our window_size (= signal length of input to Machine Learning)
# we take the average signal length of the trained gestures
window_size = avg_data_len 
window_size

In [None]:
# PREDICTION RESOLUTION
# how quickly do we step forward

# for now we choose half the window_size
step_size = window_size / 2

# can be set smaller for higher resolution

# TODO: set in milliseconds - convert back to sample length

step_size

In [None]:
# TODO: align with preprocess_signal function used in training data above

def preprocess_signal_continuous(testdata, normalize=False, resampling=False, timestamps=None, filtering=False):
    
    # Min/max normalization
    # Note: to do it the fully right way, the minmax scaling should be done on all training data coherently
    # (currently its done per training block) and the same scaling values (min and max) should be reused here
    # see http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html

    if normalize:
        testdata = preprocessing.minmax_scale(testdata, feature_range=(-1, 1), axis=0)
        
    # Time Resampling
    if resampling:
        # the number of samples stays the same
        # if provided, we use the original timestamps to re-align the signal
        n_samples = testdata.shape[0] # must match len(timestamps)
        testdata, timestamps2 = resample(testdata, num=n_samples, t=timestamps) #, window='hann')

    if filtering:
        # filter the signal block with low-pass filter
        testdata = butter_lowpass_filter(testdata, cutoff, fs, order)
        
    return testdata

In [None]:
# PREDICTION LOOP OVER 1 TRAINING INPUT BLOCK

def continuous_prediction(testdata, window_size, step_size):
    pos = 0
    n_samples = testdata.shape[0]
    
    # output
    test_groundtruth = [] # we create the groundtruth to compare with here
    predictions = []  # predictions are collected here

    while pos < (n_samples - window_size):
        # cut a window out of the incoming signal
        signal = testdata[pos:pos+window_size]

        # to get the "correct" gesture for that window, we cut the same part of the gesture information
        test_window_groundtruth = test_gestures[pos:pos+window_size]

        # we do a majority vote to say which gesture is pre-dominant in this window
        gt_gesture = Counter(test_window_groundtruth).most_common()[0][0]

        # calc features
        features = calc_all_features(signal, calc_derivative, calc_zerocrossings)

        # reshape to row vector for standardize and predict below (= single input sample)
        features = features.reshape(1, -1)  
        
        # STANDARDIZE features, the same way as done in training (reusing those mean and var)
        features = standardizer.transform(features)

        # ML prediction of gesture
        pred_gesture = best_model.predict(features)[0]

        # add to groundtruth and prediction list
        test_groundtruth.append(gt_gesture)
        predictions.append(pred_gesture)

        # step forward
        pos += step_size
    
    return test_groundtruth, predictions

In [None]:
# LOOP over ALL Experiments or Trainsets

i = 0
n_groups = len(group_df)

test_groundtruth_all = [] # we create the groundtruth to compare with here
predictions_all = []  # predictions are collected here

for name_tuple, group_data in group_df:
    
    i += 1
    print "Experiment", i, "/", n_groups, ":", str(name_tuple), group_data.shape,
    
    # just metadata
    if len(name_tuple) == 3:
        subject, exp, trainset = name_tuple
    elif len(name_tuple) == 2:
        subject, exp = name_tuple
        trainset = None
    
    # get signals, timestamps and gesture groundtruth
    timestamps = group_data['TimeStamp'].tolist()
    test_gestures = group_data['Gesture'].tolist()
    testdata = group_data[params]
    
    # preprocess testdata
    print "Preprocessing ...",
    testdata = preprocess_signal_continuous(testdata, use_normalized, use_resampled, timestamps, use_lowpassfilter)
    #print testdata.shape
    
    # convert to dataframe cause we use pandas .diff() in ZCR computation
    testdata = pd.DataFrame(testdata, columns=params)
    
    print "Prediction:", 
    test_groundtruth, predictions = continuous_prediction(testdata, window_size, step_size)
    print len(predictions), "predictions"
    
    test_groundtruth_all.extend(test_groundtruth)
    predictions_all.extend(predictions)
    

In [None]:
n_samples

In [None]:
gesture_name(11)

In [None]:
signal.shape

In [None]:
features.shape

In [None]:
n_samples

In [None]:
print len(predictions_all), "predictions"

In [None]:
print "collected true gestures include:"
np.unique(test_groundtruth_all).tolist()

In [None]:
print "predicted gestures include:"
np.unique(predictions_all).tolist()

In [None]:
pd.DataFrame({'groundt':test_groundtruth_all, 'pred':predictions_all})

In [None]:
result_ov = pd.DataFrame(columns=['result']) #columns=['Accuracy','Precision','Recall','F-Measure'])

# Accuracy, Precision, Reacall on TEST set
result_ov.loc['Accuracy'] = accuracy_score(test_groundtruth_all, predictions_all)
result_ov.loc['Precision'] = precision_score(test_groundtruth_all, predictions_all, average='macro')
result_ov.loc['Recall'] = recall_score(test_groundtruth_all, predictions_all, average='macro')
result_ov.loc['F-Measure'] = f1_score(test_groundtruth_all, predictions_all, average='macro')
result_ov

#### Confusion Matrix

In [None]:
conf = confusion_matrix(test_groundtruth_all, predictions_all, labels=labels) # labels defines the order

In [None]:
labels_long = gestures_df.loc[labels,'name']
conf_df = pd.DataFrame(conf, index=labels_long, columns=labels)
conf_df

In [None]:
labels_long