# Make-o-Matic Gesture Recognition

## Part 2: Testing

2017 by Thomas Lidy, TU Wien

### Requirements

Python 2.7

pip install -r requirements.txt

Tested on OS: Ubuntu 16.04.3 LTS

In [1]:
import numpy as np
import pandas as pd
import json
import time # for time measuring
import datetime # for time printing

from scipy import stats
from scipy.signal import resample
from collections import Counter # for majority vote
from collections import OrderedDict # for color palette

# plotting
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  

# Machine Learning
from sklearn import preprocessing, svm
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
def str_to_int(string):
    '''cut away first character and convert to int - used to convert Gesture IDs like "G01" to 1'''
    return int(string[1:])

In [3]:
def timestr(seconds):
    ''' returns HH:MM:ss formatted time string for given seconds
    (seconds can be a float with milliseconds included, but only the integer part will be used)
    :return: string
    '''
    return str(datetime.timedelta(seconds=int(seconds)))

## Read Data

In [4]:
# main data

# original input
#csv_file = 'data/EXPORT_09042017173622.csv'

# preprocessed input
csv_file = 'data/EXPORT_09042017173622_preprocessed.csv'


# json files to translate gestures, parcours into long text
#gestures_file = 'data/gestures.json' # this is the file edited manually by us to conform to json
gestures_file = 'data/gestures.json.orig' # this is the file edited manually by us to conform to json
parcours_file = 'data/parcours.json'
mutations_file = 'data/mutations.json'

files = (gestures_file, parcours_file, mutations_file)
dataframes = []

# NOTE THAT THESE JSON FILES ARE NOT JSON CONFORM
# each line is a json string on its own, so we need to process the json line by line and combine THEN into a list

In [5]:
def get_oid(oid_dict):
    # get from the original representation {u'$oid': u'589c8ed31337b5ab1e1be121'} just the oid
    return oid_dict['$oid']

In [6]:
# get meta-files with descriptions of gestures, parcours and mutations
for filename in files:
    with open(filename) as f:
        lines = [line.rstrip('\n') for line in f]   # .decode("utf-8")

    lines = [json.loads(line) for line in lines]
    
    # convert list of json lines into Dataframe
    df = pd.DataFrame.from_dict(lines)
    
    # convert long $oid to short
    df['_id'] = df['_id'].apply(get_oid)
    
    # set the real id
    df.set_index('id', inplace=True)
    
    # convert index (ID) from string like 'G01' to int
    df.index = df.index.map(str_to_int)
    
    dataframes.append(df)

In [7]:
(gestures_df, parcours_df, mutations_df) = tuple(dataframes)

In [8]:
gestures_df

Unnamed: 0,_id,isGarbage,isNesture,name,slug
1,58a23a22d826756404709446,,,Single Rotation klein rechtsrum,rssr
2,58a23a22d826756404709447,,,Single Rotation klein linksrum,rssl
3,58a23a22d826756404709448,,,Oszillierende Rotation klein rechtsrum,rosr
4,58a23a22d826756404709449,,,Oszillierende Rotation klein linksrum,rosl
5,58a23a22d82675640470944a,,,Single Rotation groß rechtsrum,rsbr
6,58a23a22d82675640470944b,,,Single Rotation groß linksrum,rsbl
7,58a23a22d82675640470944c,,,Oszillierende Rotation groß rechtsrum,robr
8,58a23a22d82675640470944d,,,Oszillierende Rotation groß linksrum,robl
9,58a23a22d82675640470944e,,,Kontinuierliche Rotation groß rechtsrum,rcbr
10,58a23a22d82675640470944f,,,Kontinuierliche Rotation groß linksrum,rcbl


In [9]:
#gestures_df.to_csv('data/gestures.csv', sep=';', encoding='utf-8')

In [10]:
# "positive" gestures to recognize (not nestures)
gestures_pos = gestures_df[gestures_df['isNesture'] != True].index.tolist()
gestures_pos

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

In [11]:
# "negative" gestures (nestures)
gestures_neg = gestures_df[gestures_df['isNesture'] == True].index.tolist()
nestures = gestures_neg # synonym
gestures_neg

[14, 15, 16, 17, 18]

In [12]:
# show first 10 parcours
parcours_df.head(10)

Unnamed: 0,_id,comment,exercises,pose
101,589c897d1337b5ab1e1be118,Zwinge X locker rechtsdrehen einzeln 5x,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...
102,589c8ba71337b5ab1e1be11c,Zwinge X locker linksdrehen einzeln 5x,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...
103,589c8dbd1337b5ab1e1be11f,Zwinge X locker rechts- und linksdrehen einzel...,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...
104,589c8ed31337b5ab1e1be121,Zwinge X fest links- und rechtsdrehen einzeln 5x,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...
105,589c8fcc1337b5ab1e1be125,Zwinge X oszilierend linksdrehen,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...
106,589c901a1337b5ab1e1be127,Zwinge X oszilierend rechtsdrehen,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...
107,589c90e31337b5ab1e1be12c,Zwinge Y locker rechtsdrehen einzeln 5x,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...
108,589c911c1337b5ab1e1be12e,Zwinge Y locker linksdrehen einzeln 5x,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...
109,589c91961337b5ab1e1be132,Zwinge Y locker rechts- und linksdrehen einzel...,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...
110,589c92351337b5ab1e1be134,Zwinge Y fest links- und rechtsdrehen einzeln 5x,"[{u'signal': {u'beep': True}, u'mutation': {u'...",{u'start': u'Host 008 ist auf Werkbank fixiert...


In [13]:
parcours_df.loc[101,'exercises']

[{u'mutation': {u'id': u'M151'}, u'signal': {u'beep': True}},
 {u'mutation': {u'id': u'M101'}, u'signal': {u'beep': True}},
 {u'mutation': {u'id': u'M152'}, u'signal': {u'beep': True}},
 {u'mutation': {u'id': u'M101'}, u'signal': {u'beep': True}},
 {u'mutation': {u'id': u'M152'}, u'signal': {u'beep': True}},
 {u'mutation': {u'id': u'M101'}, u'signal': {u'beep': True}},
 {u'mutation': {u'id': u'M152'}, u'signal': {u'beep': True}},
 {u'mutation': {u'id': u'M101'}, u'signal': {u'beep': True}},
 {u'mutation': {u'id': u'M152'}, u'signal': {u'beep': True}},
 {u'mutation': {u'id': u'M101'}, u'signal': {u'beep': True}}]

In [14]:
parcours_df.loc[101,'pose']

{u'start': u'Host 008 ist auf Werkbank fixiert, Griffachse X, untere Spannbacke fixiert (Xcm), Gewinde genau in Mitte. Subject steht frontal vor Werkbank, etwas links von Host, F\xfc\xdfe in Schulterbreite, Arme h\xe4ngen locker hinunter.'}

In [15]:
# show first 10 mutations
mutations_df.head(10)

Unnamed: 0,_id,hands,params,slug
101,58a24ffa5097dc5998d3c5d0,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaaaaaabaaaba
102,58a24ffa5097dc5998d3c5d1,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaaaaaabababa
103,58a24ffa5097dc5998d3c5d2,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaaaaabbaaaba
104,58a24ffa5097dc5998d3c5d3,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaaaaabbababa
105,58a24ffa5097dc5998d3c5d4,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaaaaacbaabba
106,58a24ffa5097dc5998d3c5d5,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaaaaadbabbba
107,58a24ffa5097dc5998d3c5d6,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaabaaabaaaba
108,58a24ffa5097dc5998d3c5d7,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaabaaabababa
109,58a24ffa5097dc5998d3c5d8,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaabaabbaaaba
110,58a24ffa5097dc5998d3c5d9,"{u'right': {u'host': {u'spot': {u'id': u'A', u...","[{u'slug': u'm0', u'value': u'stehend', u'labe...",aaabaabbababa


In [16]:
mutations_df.loc[151,'hands']

{u'right': {u'gesture': {u'id': u'G15'},
  u'host': {u'id': u'H008', u'name': u'Schraubzwinge klein'},
  u'instruction': u'zu Griff f\xfchren, greifen mit Faust'}}

In [17]:
mutations_df.loc[151,'params']

nan

In [18]:
mutations_df.loc[152,'hands']

{u'right': {u'gesture': {u'id': u'G17'},
  u'host': {u'id': u'H008', u'name': u'Schraubzwinge klein'},
  u'instruction': u'Umgreifen (Hand l\xf6sen, 90\xb0 Linksdrehung, Host mit Faust-Handhaltung erneut greifen)'}}

In [19]:
mutations_df.loc[101,'hands']

{u'right': {u'gesture': {u'id': u'G01'},
  u'host': {u'id': u'H008',
   u'name': u'Schraubzwinge klein',
   u'spot': {u'id': u'A', u'name': u'Drehgriff'}},
  u'instruction': u'Zwinge X locker rechtsdrehen einzeln'}}

In [20]:
mutations_df.loc[101,'params']

[{u'label': u'K\xf6rperhaltung', u'slug': u'm0', u'value': u'stehend'},
 {u'label': u'Spot', u'slug': u'm1', u'value': u'Drehgriff'},
 {u'label': u'Position Spot', u'slug': u'm2', u'value': u'mittig vor Subject'},
 {u'label': u'Orientierung Spot', u'slug': u'm3', u'value': u'Griffachse X'},
 {u'label': u'ausf\xfchrende Hand', u'slug': u'm4', u'value': u'Rechte'},
 {u'label': u'Handhaltung', u'slug': u'm5', u'value': u'Faust'},
 {u'label': u'Widerstand', u'slug': u'm6', u'value': u'keiner'},
 {u'label': u'Drehweise',
  u'slug': u'm7',
  u'value': u'ganze Hand mitbewegen, ganze Hand umgreifen'},
 {u'label': u'Drehwinkel', u'slug': u'm8', u'value': u'90\xb0'},
 {u'label': u'Drehrichtung',
  u'slug': u'm9',
  u'value': u'rechtsrum (schlie\xdfen)'},
 {u'label': u'Bewegungsablauf', u'slug': u'm10', u'value': u'einzeln'},
 {u'label': u'2. Hand', u'slug': u'm11', u'value': u'ruht woanders'}]

In [21]:
mutations_df.loc[101,'hands']['right']['gesture']['id']

u'G01'

In [22]:
mutations_df.loc[101,'params']

[{u'label': u'K\xf6rperhaltung', u'slug': u'm0', u'value': u'stehend'},
 {u'label': u'Spot', u'slug': u'm1', u'value': u'Drehgriff'},
 {u'label': u'Position Spot', u'slug': u'm2', u'value': u'mittig vor Subject'},
 {u'label': u'Orientierung Spot', u'slug': u'm3', u'value': u'Griffachse X'},
 {u'label': u'ausf\xfchrende Hand', u'slug': u'm4', u'value': u'Rechte'},
 {u'label': u'Handhaltung', u'slug': u'm5', u'value': u'Faust'},
 {u'label': u'Widerstand', u'slug': u'm6', u'value': u'keiner'},
 {u'label': u'Drehweise',
  u'slug': u'm7',
  u'value': u'ganze Hand mitbewegen, ganze Hand umgreifen'},
 {u'label': u'Drehwinkel', u'slug': u'm8', u'value': u'90\xb0'},
 {u'label': u'Drehrichtung',
  u'slug': u'm9',
  u'value': u'rechtsrum (schlie\xdfen)'},
 {u'label': u'Bewegungsablauf', u'slug': u'm10', u'value': u'einzeln'},
 {u'label': u'2. Hand', u'slug': u'm11', u'value': u'ruht woanders'}]

In [23]:
mutations_df.loc[151]

_id                                58a250245097dc5998d3c657
hands     {u'right': {u'host': {u'id': u'H008', u'name':...
params                                                  NaN
slug                                        Start - Greifen
Name: 151, dtype: object

### Define handy function shortcuts

In [24]:
def gesture_name(gesture_id):
    if gesture_id is None: return None
    return gestures_df.loc[gesture_id,'name']

In [25]:
def get_mutation_gesture(mutation_id):
    gest_str = mutations_df.ix[mutation_id,'hands']['right']['gesture']['id']
    return str_to_int(gest_str)

## Read Experiment Data

In [26]:
# Experiment Data
data = pd.read_csv(csv_file)

  interactivity=interactivity, compiler=compiler, result=result)


In [27]:
data.shape

(711360, 20)

In [28]:
data.columns

Index([u'Trainset', u'Experiment', u'Subject', u'TimeStamp', u'RFID',
       u'GRASP_A', u'GRASP_B', u'GRASP_C', u'AX', u'AY', u'AZ', u'EX', u'EY',
       u'EZ', u'Parcours', u'Parcours_Step', u'Mutation', u'Host',
       u'Host/Spot', u'Gesture'],
      dtype='object')

In [29]:
data.dtypes

Trainset          object
Experiment         int64
Subject           object
TimeStamp          int64
RFID              object
GRASP_A            int64
GRASP_B            int64
GRASP_C            int64
AX               float64
AY               float64
AZ               float64
EX               float64
EY               float64
EZ               float64
Parcours           int64
Parcours_Step      int64
Mutation           int64
Host               int64
Host/Spot         object
Gesture            int64
dtype: object

In [30]:
data.head(10)

Unnamed: 0,Trainset,Experiment,Subject,TimeStamp,RFID,GRASP_A,GRASP_B,GRASP_C,AX,AY,AZ,EX,EY,EZ,Parcours,Parcours_Step,Mutation,Host,Host/Spot,Gesture
0,_TRAINSET14022017094616,1,Andreas,0,0,781,8,797,0.06,-0.02,-0.1,216.8125,9.0625,-81.9375,101,1,151,8,,15
1,_TRAINSET14022017094616,1,Andreas,29001,0,782,0,799,0.09,-0.04,-0.11,217.0625,9.0625,-81.9375,101,1,151,8,,15
2,_TRAINSET14022017094616,1,Andreas,46136,0,782,6,798,0.12,-0.09,0.09,217.4375,9.125,-81.875,101,1,151,8,,15
3,_TRAINSET14022017094616,1,Andreas,74902,0,784,7,798,0.08,-0.08,0.03,217.625,9.125,-81.8125,101,1,151,8,,15
4,_TRAINSET14022017094616,1,Andreas,97663,0,781,0,798,0.07,-0.09,0.04,217.9375,9.1875,-81.75,101,1,151,8,,15
5,_TRAINSET14022017094616,1,Andreas,116448,0,784,4,800,0.12,-0.06,-0.03,218.3125,9.25,-81.75,101,1,151,8,,15
6,_TRAINSET14022017094616,1,Andreas,148753,0,783,0,798,0.21,-0.04,0.03,218.5,9.3125,-81.75,101,1,151,8,,15
7,_TRAINSET14022017094616,1,Andreas,167422,0,784,2,798,0.18,-0.1,-0.08,218.6875,9.375,-81.75,101,1,151,8,,15
8,_TRAINSET14022017094616,1,Andreas,187481,0,782,4,799,0.15,-0.18,-0.03,219.0,9.4375,-81.75,101,1,151,8,,15
9,_TRAINSET14022017094616,1,Andreas,213733,0,784,13,799,0.15,-0.18,-0.17,219.125,9.4375,-81.75,101,1,151,8,,15


## Some Data Stats

In [31]:
# how many subjects
subjects = data['Subject'].unique().tolist()
subjects 

['Andreas', 'Alfred', 'Claudia', 'Dominik']

In [32]:
# how many RFID ids
data['RFID'].unique().tolist()

['000000000000',
 '09006734114B',
 '0',
 '05003DD5CD20',
 '760057911BAB',
 '13005E4BB7B1',
 '1400135B8AD6',
 '14001486BE38',
 '7600577D0D51',
 0,
 '140014CD1AD7',
 '280015E55981']

In [33]:
# how many experiments
experiments = data['Experiment'].unique().tolist()
experiments

[1, 2, 3, 4, 5, 6, 7, 8]

In [34]:
# how many training sets
len(data['Trainset'].unique().tolist())

539

In [35]:
# how many parcours
len(data['Parcours'].unique())

116

In [36]:
data['Parcours'].unique()

array([101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
       114, 115, 116, 117, 118, 401, 402, 403, 201, 202, 203, 204, 205,
       206, 207, 208, 211, 212, 213, 216, 217, 218, 221, 222, 223, 226,
       227, 228, 701, 702, 705, 706, 709, 710, 713, 714, 801, 802, 601,
       602, 603, 606, 607, 608, 611, 612, 613, 616, 617, 618, 621, 622,
       623, 501, 502, 504, 503, 301, 302, 306, 307, 308, 311, 312, 316,
       317, 318, 321, 322, 326, 327, 328, 119, 120, 121, 231, 232, 234,
       233, 236, 235, 717, 718, 719, 720, 721, 722, 723, 724, 626, 627,
       628, 629, 630, 331, 332, 333, 334, 335, 336, 901, 902, 903])

In [37]:
# how many parcours steps
data['Parcours_Step'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [38]:
# how many mutations
len(data['Mutation'].unique())

176

In [39]:
# how many gestures
len(data['Gesture'].unique())

17

In [None]:
# which gestures exist
sorted(data['Gesture'].unique())

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]

In [None]:
data.head()

In [None]:
# how many hosts
len(data['Host'].unique())

In [None]:
# which Host Spots
data['Host/Spot'].unique().tolist()

## Look into the Data

In [None]:
data.columns

### Get Experiment data: Filter for one experiment of one subject (just for testing)

In [None]:
def get_experiment(data,experiment,trainset,subject,parcours,mutation,gesture):
    query = []
    # by passing None, some parts of these can be omitted (returning more data)
    if experiment: query.append('Experiment==@exp')
    if trainset: query.append('Trainset==@trainset')
    if subject: query.append('Subject==@subject')
    if parcours: query.append('Parcours==@parcours')
    if mutation: query.append('Mutation==@mutation')
    if gesture: query.append('Gesture==@gesture')
    # TODO filter by Parcours_Step u/o Host
    
    query = " and ".join(query)
    #print query
    return data.query(query)

### Get 1 Experiment

In [None]:
# get experiment data (setting one to None does not filter by it)
exp = 1
subject = 'Andreas'
trainset = None
parcours = None # 102
mutation = None
gesture = None #2 # 1

mydata = get_experiment(data,exp,trainset,subject,parcours,mutation,gesture)
mydata.shape

In [None]:
# if we queried without Parcours, use this to get all the Parcours
parcours = mydata['Parcours'].unique().tolist()
print len(parcours), "Parcours"

In [None]:
# how many training sets included in this experiment
trainsets = mydata['Trainset'].unique().tolist()
print len(trainsets), "Train sets"

In [None]:
# print first few
trainsets[:5]

In [None]:
# how often does the Timestamp jump, i.e. start at 0 again?
sum(mydata['TimeStamp'].diff() < 0)

In [None]:
# -> seems right!

### Get 1 Trainset

In [None]:
# get experiment data (setting one to None does not filter by it)
exp = 1
subject = 'Andreas'
trainset = '_TRAINSET14022017094616'
parcours = None # 102
mutation = None
gesture = None #2 # 1

mydata = get_experiment(data,exp,trainset,subject,parcours,mutation,gesture)
mydata.shape

In [None]:
# which parcours
mydata['Parcours'].unique().tolist()

In [None]:
# which parcours-steps
mydata['Parcours_Step'].unique().tolist()

In [None]:
group_by = ('Subject','Experiment','Trainset','Parcours','Parcours_Step')
mydata.groupby(group_by).size()

In [None]:
# which gestures in each Parcours_Step
for idx, group_df in mydata.groupby(group_by):
    parcours_step = idx[-1] # last entry in index is the Parcours_Step
    print parcours_step, np.unique(group_df['Gesture']), len(group_df)

In [None]:
# which mutations
mut_list = mydata['Mutation'].unique().tolist()
mut_list

In [None]:
# which gestures
gest_list = mydata['Gesture'].unique().tolist()
gest_list

In [None]:
# which mutation corresponds to which gesture
for mut in mut_list:
    gest = get_mutation_gesture(mut)
    print mut, ":", gest, gesture_name(gest)

In [None]:
# which RFIDs
mydata['RFID'].unique().tolist()

### Get 1 Gesture

In [None]:
# get data for 1 gesture
mutation = None
parcours = 101
gesture = 15 
mydata = get_experiment(data,exp,trainset,subject,parcours,mutation,gesture)
mydata.head()

In [None]:
### Check TimeStep deltas
# investigate time stamp delta mean and variance
mydata['TimeStamp'].diff().describe()

In [None]:
print "Average Time Stamp delta:", mydata['TimeStamp'].diff().mean()

In [None]:
# how often does the Timestamp jump, i.e. start at 0 again?
sum(mydata['TimeStamp'].diff() < 0)

In [None]:
# -> should be 0: within a gesture there should be no TimeStamp reset!

### Plot timeline of sensor data of 1 Trainset

In [None]:
params = ['AX', 'AY', 'AZ', 'EX', 'EY', 'EZ', 'GRASP_A', 'GRASP_B', 'GRASP_C']
# TODO add RFID?

In [None]:
def one_plot(mydata,var):
    y = mydata[var]
    x = range(len(y))
    #x = mydata['TimeStamp']
    plt.plot(x, y)
    title = "E%s %s %s P%s M%s G%s %s" % (str(exp),subject,trainset,str(parcours),str(mutation),str(gesture),gesture_name(gesture)) 
    plt.title(title)
    #plt.show()

In [None]:
def grid_plot(mydata, title=True):
    rows = 3
    cols = 3
    fig, axes = plt.subplots(rows, cols, figsize=(16, 10)) #, sharex=True, sharey=True)
    if title is True:
        title = "E%s %s %s P%s M%s G%s %s" % (str(exp),subject,trainset,str(parcours),str(mutation),str(gesture),gesture_name(gesture)) 

    if title is not None:
        fig.suptitle(title, fontsize=16)

    axes_list = axes.reshape(-1)

    for i, var in enumerate(params):

        # data
        y = mydata[var]
        x = range(len(y))
        #x = mydata['TimeStamp']

        # subplot index
        r = i % rows
        c = int(i / rows)
        axes[r,c].plot(x, y)
        axes[r,c].set_title(var)
    

In [None]:
# show data for 1 PARCOURS
exp = 1
subject = 'Andreas'
trainset = trainsets[0]
parcours = 101
mutation = None
gesture = None 

mydata = get_experiment(data,exp,trainset,subject,parcours,mutation,gesture)

grid_plot(mydata)

In [None]:
var = 'TimeStamp'
one_plot(mydata,var)
print "Avg. TimeStamp delta:", mydata['TimeStamp'].diff().mean()

#### = Nice continuous data

### Plot Gestures == Parcour-Steps inside 1 Trainset with different color:

In [None]:
# which / how many gestures are in this parcours?
gest_list = mydata['Gesture'].unique().tolist()

for g in gest_list:
    print "G", g, gesture_name(g)

In [None]:
trainset

In [None]:
var = 'EY'
mydata.head()

In [None]:
# define color map
n_gestures = len(gestures_df)
colors = matplotlib.cm.jet(np.linspace(0, 1, n_gestures))

In [None]:
#print "Plot of gestures in 1 Parcours by different colours:"

group_data = mydata.groupby(('Parcours','Parcours_Step'))

plt.figure(figsize=(16,8))

for idx, group_df in group_data:
    gest_list = group_df['Gesture'].unique()
    if len(gest_list) != 1:
        raise ValueError("More than 1 gesture in parcours step:" + str(gest_list))
        
    gest = gest_list[0]
    color = tuple(colors[gest])
    plt.plot(group_df['TimeStamp'], group_df[var], color=color, label=str(gest))

# resorting lables in an OrderedDict as they would be repeated otherwise
handles, labels = plt.gca().get_legend_handles_labels()
by_label = OrderedDict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys())  # loc='upper left')
plt.title("Gestures in 1 Parcours by color")
pass

In [None]:
one_plot(mydata,var)

### Plot 1 Gesture

In [None]:
# get data for 1 GESTURE

mutation = None
gesture = 1

mydata = get_experiment(data,exp,trainset,subject,parcours,mutation,gesture)

grid_plot(mydata)

In [None]:

one_plot(mydata,'EY')

In [None]:
var = 'TimeStamp'
one_plot(mydata,var)
print "Avg. TimeStamp delta:", mydata['TimeStamp'].diff().mean()

#### = Cuts in the data! PROBLEM!!!

#### CONCLUSION: USE PARCOURS DATA - but it may contain multiple gestures - how to group them??

## Solution: Replace Nestures

In [None]:
replace_nestures = True

In [None]:
group_by = ('Subject','Experiment','Trainset','Parcours','Mutation','Gesture')
group_df = data.groupby(group_by)
print "Originally", len(group_df), "individual gesture blocks"

In [None]:
data.groupby(group_by).count().head(20)

In [None]:
# Therefore Group by PARCOURS
# group data nicely, subdivided by Subject, Experiment, Trainset, Parcours
group_by = ('Subject','Experiment','Trainset','Parcours')

In [None]:
# just show groups first
data.groupby(group_by).count().head(20)

In [None]:
# CHECK before: how many / which gestures are in each Parcours

for name_tuple, group_data in data.groupby(group_by):
    gest_list = group_data['Gesture'].unique().tolist()
    print name_tuple, ":", gest_list

In [None]:
# Step 1: replace ALL Nestures by NaN
if replace_nestures:
    # make a copy of the complete data before altering anything
    data_nonest = data.copy()
    idx_nestures = data_nonest['Gesture'].isin(nestures)
    # replace nestures by NaN
    data_nonest.loc[idx_nestures,'Gesture'] = np.nan
    print data_nonest.head()

In [None]:
data.head()

In [None]:
data_nonest.head()

In [None]:
# CHECK after: which gestures are in each Parcours
if replace_nestures:
    for name_tuple, group_data in data_nonest.groupby(group_by):
        print name_tuple, ":", group_data['Gesture'].unique().tolist()
        
    #if len(gest_list) != 1:
    #    raise ValueError("Parcours-Step has more than 1 gesture!")

In [None]:
# now we can use the Forward FILL and Backward FILL methods of Pandas
# to replace the NaNs by the values that come before or after

# BUT: we shall not do that across Parcours/Experiments!

In [None]:
# GROUPBY helps us here to apply the fill methods only within a PARCOURS

if replace_nestures:
    # BACKWARD FILL first by later values to NaNs before
    data_nonest = data_nonest.groupby(group_by).bfill()

    # in case there would be NaNs left, do also a FORWARD FILL
    #data = data.groupby(group_by).ffill()
    
    print "Replaced Nestures by filling with neighboured Gestures!"
    print np.isnan(data_nonest['Gesture']).sum(), "NaN values remaining. Should be 0."

In [None]:
# NOTE: bfill applies to ALL COLUMNS! so there might be other columns affected by this!
# TODO double-check any side effects!

In [None]:
# adding NaNs cause the Gesture column to be converted from int to float
# we convert back to int
if replace_nestures:
    data_nonest['Gesture'] = data_nonest['Gesture'].astype(int)

In [None]:
max_num_gest = 0

# CHECK after: which gestures are in each Parcours
if replace_nestures:
    for name_tuple, group_data in data_nonest.groupby(group_by):
        gest_list = group_data['Gesture'].unique().tolist()
        print name_tuple, ":", gest_list
        l = len(gest_list)
        max_num_gest = max(l,max_num_gest)

In [None]:
max_num_gest

In [None]:
# Nesture -> NaN has been correctly replaced by Gesture 1:
data_nonest.head()

In [None]:
# check via groupby:
group_by = ('Subject','Experiment','Trainset','Parcours','Gesture')
group_df = data_nonest.groupby(group_by)
print "After nesture replacement", len(group_df), "individual gesture blocks"

In [None]:
group_df.count()

In [None]:
# keep original data in a variable
data_orig = data

In [None]:
# from here on we use data again for data_nonest

if replace_nestures:
    data = data_nonest

## Data Pre-Procssing Part I

### Which Sensor Parameters to use?

In [None]:
include_GRASP = True

if include_GRASP:
    params = ['AX', 'AY', 'AZ', 'EX', 'EY', 'EZ', 'GRASP_A', 'GRASP_B', 'GRASP_C']
else:
    params = ['AX', 'AY', 'AZ', 'EX', 'EY', 'EZ']

# TODO add RFID?

### Normalize Parameter columns to -1, 1

here it's done globally. if set to False, there is an option to do it locally later

In [None]:
normalize_global = False
# normalize_global means we normalize all parameter columns at once, globally => NO LATER TREATMENT

In [None]:
data[params].head()

In [None]:
if normalize_global:
    # normalize to -1, 1
    data[params] = preprocessing.minmax_scale(data[params], feature_range=(-1, 1), axis=0, copy=False)

In [None]:
data[params].head()

## Groupings for each Gesture (by Subject, Experiment, Trainset, Parcours and Mutation)

to be further processed for learning

In [None]:
# A) GET INDIVIDUAL GESTURES 
# group data nicely, subdivided by Subject, Experiment, Trainset, Parcours, Gesture

if replace_nestures:
    # NOTE: we HAVE to remove Mutation here! otherwise the Gestures merged by replacing Nestures will still be SEPARATE
    group_by = ('Subject','Experiment','Trainset','Parcours','Gesture')
else:
    group_by = ('Subject','Experiment','Trainset','Parcours','Mutation','Gesture')

group_df = data.groupby(group_by)
group_df.mean().head(100)  # mean is not meaningful here as aggregation - just to print the structure of the data

In [None]:
print len(group_df), "individual gesture blocks"

In [None]:
# B) GET PARCOURS - AVOIDS CUTS IN THE DATA - BUT MERGES MULTIPLE GESTURES (!!)

# EXPERIMENTAL-  DO NOT USE
def unused():
    
    # group data nicely, subdivided by Subject, Experiment, Parcours, Mutation, Gesture
    group_by = ('Subject','Experiment','Trainset','Parcours')
    group_df = data.groupby(group_by)
    group_df.mean().head(100)  # mean is not meaningful here as aggregation - just to print the structure of the data

In [None]:

# store for each group (parcours) the prevalent gesture

def unused():
    prevalent_gestures = []

    for name_tuple, group_data in group_df:

        # TODO this is a TEMPORARY SOLUTION - some PARCOURS contain multiple gestures!!!
        # get all gestures in Parcours, descending by count (TODO: use size())
        gest_sub_df = group_data.groupby("Gesture").count().sort_values('Experiment', ascending=False)
        # retain only "positive" gestures, remove nestures
        idx_pos = gest_sub_df.index.intersection(pd.Index(gestures_pos))
        # get the most frequent gesture
        gest_most_frequent = gest_sub_df.ix[idx_pos].index[0]

        prevalent_gestures.append(gest_most_frequent)

        print name_tuple, ":", group_data['Gesture'].unique().tolist(), "- most frequent:", gest_most_frequent

        # only for debugging
        if name_tuple == ('Alfred', 8, 231): # gestures:  [15, 16, 3, 4]
            test_group_data = group_data

        if name_tuple == ('Alfred', 2, 101): # gestures:  [15, 1, 17] - most frequent: 17
            test_group_data2 = group_data

            # REPLACE original gestures by PREVALENT GESTURE
            # DOES NOT WORK ON ORIGINAL DATA!!!!!
            #group_data['Gesture'] = gest_most_frequent


## Get Gesture Data: 1 Block per each individual Gesture

we put each time series that belong to 1 particular gesture in a particular parcours into a dictionary,
which contains a list of such time series blocks per gesture entry in the dict

In [None]:
# now we ITERATE nicely through group_df and get each Gesture block individually
# -> group_data will be a dataframe just for a single gesture

i=0
# dictionary containing a list of sub-datasets for each gesture, to train ML
gesture_exp_dict = {}

for name_tuple, group_data in group_df:
    i += 1
    #print str(name_tuple)
    gesture = name_tuple[-1]  # gesture is last element of tuple, as defined in group_by above
    
    # initalize empty list for this gesture
    if gesture not in gesture_exp_dict.keys():
        gesture_exp_dict[gesture] = [] 
        
    # add data to gesture dict
    gesture_exp_dict[gesture].append(group_data)
    
    # NOTE that group_data here still contains ALL data columns. we will redue to params later

print "DONE:", i, "gesture blocks"

In [None]:
# Older version iterating over experiments with FOR loop - MUCH SLOWER! 
# - UNUSED BUT KEPT FOR EXAMPLE HOW IT CAN BE DONE - 
# Iterate over Experiments etc. to group data by gestures

def unused():

    # dictionary containing a list of sub-datasets for each gesture, to train ML
    gesture_exp_dict = {}

    experiments = data['Experiment'].unique().tolist()

    for subject in subjects:
        for exp in experiments:

            exp_data = get_experiment(data,exp,subject,parcours=None,mutation=None,gesture=None)

            if exp_data.shape[0] > 0:
                # list of parcours in this experiment
                parc_list = exp_data['Parcours'].unique().tolist()

                print subject, exp, ":", len(parc_list), "parcours"

    #            for parc in parc_list:
                for parc in parc_list[:5]:     # DEBUG: take only first 5 parcours per experiment
                    mydata = get_experiment(data,exp,subject,parc,mutation=None,gesture=None)
                    mutations = exp_data['Mutation'].unique().tolist()
                    gestures = exp_data['Gesture'].unique().tolist() 

                    print "- P", parc, mydata.shape, len(mutations), "mutations", len(gestures), "gestures"

                    for mut in mutations:
                        mydata = get_experiment(data,exp,subject,parc,mut,gesture=None)
                        gestures = mydata['Gesture'].unique().tolist() 

                        for gest in gestures:
                            mydata = get_experiment(data,exp,subject,parc,mut,gest)
                            print mydata.shape
                            # add data to gesture dict
                            if mydata.shape[0] > 0:
                                if gest not in gesture_exp_dict.keys():
                                    gesture_exp_dict[gest] = [] # initalize empty list for this gesture
                                # add data to gesture dict
                                gesture_exp_dict[gest].append(mydata)

    print "FINISHED."

In [None]:
# How many data blocks = training examples do we have for each gesture
for gest in sorted(gesture_exp_dict.keys()):
    print "G", gest, '\t', len(gesture_exp_dict[gest]), "training data blocks", '\t', gesture_name(gest) 

In [None]:
# how many data points (= samples or timesteps) does each data block have?

data_sizes = {} # collect per gesture in dict
data_sizes_total = [] # collect all in list

print "average data length (number of samples) per gesture:"

for gest in sorted(gesture_exp_dict.keys()):
    print "G", gest, ':\t', 
    data_sizes[gest] = []
    for datablock in gesture_exp_dict[gest]:
        size = datablock.shape[0]
        #print size,
        # TODO data_sizes ...
        data_sizes[gest].append(size)
        data_sizes_total.append(size)
    print int(np.mean(data_sizes[gest]))

In [None]:
print min(data_sizes_total), max(data_sizes_total)

In [None]:
# average data length (number of samples)
print "Average data length (number of samples) of all gestures"
avg_data_len = int(np.mean(data_sizes_total))
avg_data_len

### Verification: How does the individual Gesture Data look like

In [None]:
# get 0th entry fo gesture 1
g = gesture = 1
i=0
mydata = gesture_exp_dict[g][i]
mydata.shape

In [None]:
mydata.head()

In [None]:
one_plot(mydata,'TimeStamp')

In [None]:
title = "G%d #%d - %s" % (g,i,gesture_name(g))
grid_plot(mydata, title)

In [None]:
# get 3rd entry fo gesture 3
g = gesture = 3
i=3
mydata = gesture_exp_dict[g][i]

In [None]:
one_plot(mydata,'TimeStamp')

In [None]:
title = "G%d #%d - %s" % (g,i,gesture_name(g))
grid_plot(mydata, title)

In [None]:
g = 4
i=2
mydata = gesture_exp_dict[g][i]

In [None]:
title = "G%d #%d - %s" % (g,i,gesture_name(g))
grid_plot(mydata, title)

In [None]:
one_plot(mydata,'TimeStamp')

## TODO why do we still have step data here??

## Data Pre-Processing Part II

### Reduce Data to desired parameter columns

In [None]:
# in the group_df iteration before, we kept all data columns 
# now we ITERATE over the gesture_exp_dict again, retaining only the parameter columns

gesture_dict_params = {}
n_datablocks = 0

for g in sorted(gesture_exp_dict.keys()):
    print "G" + str(g) +'\t',
        
    #initalize empty list for this gesture
    gesture_dict_params[g] = [] 
            
    for datablock in gesture_exp_dict[g]:

        # reduce to params columns
        datablock_params = datablock[params] # .T # prevously: # transpose: 9 data rows with params, cols is time series
        
        # add data to new gesture dict
        gesture_dict_params[g].append(datablock_params)
        
        n_datablocks += 1
    
    print len(gesture_dict_params[g]), "data blocks"
print

### Normalization: Min-Max Scaling - Testing

#### Test for 1 signal

In [None]:
# get 0th entry fo gesture 1
g = 1
i=0
mydata = gesture_dict_params[g][i]
var = 'EZ'
signal_orig = mydata[var]

In [None]:
# plot with indices on x
y = signal_orig
plt.plot(range(len(y)), y)

In [None]:
# normalize to -1, 1
signal_normalized = preprocessing.minmax_scale(signal_orig, feature_range=(-1, 1))

In [None]:
# plot with indices on x
y = signal_normalized
plt.plot(range(len(y)), y)

#### Test for all signals

In [None]:
# try on a block of data
mydata.head()

In [None]:
# normalize all rows
mydata_norm = preprocessing.minmax_scale(mydata, feature_range=(-1, 1),axis=0)
mydata_norm.shape

In [None]:
mydata_norm = pd.DataFrame(mydata_norm, index=mydata.index, columns=mydata.columns)
mydata_norm.head()

In [None]:
# before
var = 'EZ'
y = mydata[var]
plt.plot(range(len(y)), y)

In [None]:
# plot with indices on x
y = mydata_norm[var]
plt.plot(range(len(y)), y)

### Normalization: Min-Max Scaling

In [None]:
make_copy = True  # MAKE A COPY OF THE DATA for normalized version -> takes more RAM

In [None]:
# Loop over ALL gesture data
# build new gesture dictionary for normalized data

gesture_dict_norm = {}

for g in sorted(gesture_dict_params.keys()):
    print "G" + str(g) +'\t',

    #initalize empty list for this gesture
    gesture_dict_norm[g] = [] 

    for datablock in gesture_dict_params[g]:

        # normalize all param columns
        datablock_norm = preprocessing.minmax_scale(datablock, feature_range=(-1, 1), axis=0, copy=make_copy)
        
        # make DataFrame again (to retriev column parameters later)
        datablock_norm = pd.DataFrame(datablock_norm, index=datablock.index, columns=datablock.columns)

        # add data to new gesture dict
        gesture_dict_norm[g].append(datablock_norm)

    print len(gesture_dict_norm[g]), "data blocks"

In [None]:
# Verifying normalization

In [None]:
# get 0th entry fo gesture 1
g = 1
i=0
mydata = gesture_dict_norm[g][i]
mydata.head()

In [None]:
var = 'EZ'
y = mydata[var]
plt.plot(range(len(y)), y)

### Testing Time Normalization: Resampling

In [None]:
# Time Normalize / Interpolate time stamps
signal_orig = gesture_exp_dict[g][i]['EZ']
signal_orig_timestamps = gesture_exp_dict[g][i]['TimeStamp'].values

In [None]:
# plot with indices on x
y = signal_orig
plt.plot(range(len(y)), y)

In [None]:
# plot with timestamps on x
plt.plot(signal_orig_timestamps, signal_orig)

In [None]:
signal_orig_timestamps

In [None]:
# testing
#samples = 500 

# we set number of samples in resample signal to average number of samples across all gestures
samples = avg_data_len

# TODO: try min or max of sample length of all gestures

# If t is not given, it is assumed to be the sample positions associated with the signal data in x.
signal_resampled = resample(signal_orig, num=samples, window='hann') # , t=None,

In [None]:
signal_resampled2, timestamps2 = resample(signal_orig, num=samples, t=signal_orig_timestamps, window='hann')

In [None]:
np.all(signal_resampled == signal_resampled2)

In [None]:
# plot with indices on x
y = signal_resampled
plt.plot(range(len(y)), y)

In [None]:
# plot with indices on x
y = signal_resampled2
plt.plot(range(len(y)), y)

In [None]:
# plot with timestamps on x
y = signal_resampled2
plt.plot(timestamps2, y)

In [None]:
max(signal_orig_timestamps)

In [None]:
max(timestamps2)

In [None]:
len(signal_resampled2)

In [None]:
# timestamp delta
max(timestamps2) / len(signal_resampled2) 

In [None]:
# TODO compute sampling rate across all input, not just this one
sampling_rate = 1.0 / (max(timestamps2) / 1000000.0/ len(signal_resampled2)) # / 1000 = ms to sec
sampling_rate

### Resampling / Time Normalization - Batch

In [None]:
use_normalized_for_resampling = True

In [None]:
if use_normalized_for_resampling and not normalize_global:
    # normalized
    input_dict = gesture_dict_norm
else:
    # original
    input_dict = gesture_dict_params

In [None]:
# Resample Loop over ALL data

# build new gesture dictionary for resampled data

gesture_dict_resampled = {}

for g in sorted(input_dict.keys()):
    print "G", g#, ':\t'#, 
    gesture_dict_resampled[g] = [] # initalize empty list for this gesture
    
    for datablock in input_dict[g]:
        #print datablock.shape, 

        # resample the whole block to target number of samples
        # a) without original timestamps
        datablock_resampled = resample(datablock, num=samples, axis=0, window='hann')
        
        # b) TODO re-interpolation according to original timestamps
        #datablock['TimeStamp'].values # TODO keep them before
        #datablock_resampled = resample(datablock, num=samples, axis=0, t=signal_orig_timestamps)
        
        # make DataFrame again (to retrieve column parameters later)
        datablock_resampled = pd.DataFrame(datablock_resampled, columns=datablock.columns)

        # add data to new gesture dict
        gesture_dict_resampled[g].append(datablock_resampled)
    #print

In [None]:
datablock

### Time Resampling - Verification

In [None]:
datablock.shape

In [None]:
# get 0th entry fo gesture 1
g = 1
i=0
mydata = input_dict[g][i]
mydata.head()

In [None]:
# before
var = 'EX'
y = mydata[var]
plt.plot(range(len(y)), y)

In [None]:
# after
mydata_res = gesture_dict_resampled[g][i]
mydata_res.head()

In [None]:
y = mydata_res[var]
plt.plot(range(len(y)), y)

### Testing: Calc Derivative Signals

In [None]:
# Calc derivatives 

v = 'EX'
y = mydata[var]

y_d = np.gradient(y) 
plt.plot(range(len(y_d)), y_d)

In [None]:
# can we do it on the matrix of input signals?
mydata_deriv = np.gradient(mydata, axis=0)
mydata_deriv.shape

In [None]:
# deriv from full matrix
i = mydata.columns.get_loc(var)
i

In [None]:
y = mydata_deriv[:,i]
plt.plot(range(len(y)), y)

#### -> Problem with Artefacts!

### Low-Pass Filter - Testing

removing high frequencies (little fluctuations which are probably not relevant)

In [None]:
# source code from https://stackoverflow.com/questions/25191620/creating-lowpass-filter-in-scipy-understanding-methods-and-units

from scipy.signal import butter, lfilter, freqz

def butter_lowpass(cutoff, fs, order=5):
    '''cutoff: cutoff frequency in Hz
    fs: sampling rate in Hz'''
    nyq = 0.5 * fs # Nyquist frequency is half the sampling rate.
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def butter_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [None]:
# before
var = 'EX'
y = mydata[var]
plt.plot(range(len(y)), y)

In [None]:
# Filter settings
#fs = 30.0       # sample rate, Hz
fs = sampling_rate   # determined before by average time delta # TODO improve its computation

# CHOOSE HERE desired cutoff frequency of the filter Hz
order = 1 #3 #5 #6

cutoff = 4 #Hz
#cutoff = 3.667 
#cutoff = 1.3
#cutoff = 0.667 
#cutoff = 0.5
#cutoff = 0.33

In [None]:
# after
# Filter the data, and plot both the original and filtered signals.
y_filt = butter_lowpass_filter(y, cutoff, fs, order)

In [None]:
plt.plot(range(len(y_filt)), y_filt)

### Low-Pass Filter - Batch

In [None]:
# Resample Loop over ALL data

# build new gesture dictionary: in: resampled data out: filtered data

input_dict = gesture_dict_resampled

gesture_dict_filtered = {} # out

for g in sorted(input_dict.keys()):
    print "G", g#, ':\t'#, 
    gesture_dict_filtered[g] = [] # initalize empty list for this gesture
    
    for datablock in input_dict[g]:
        #print datablock.shape, 

        # filter the signal block with low-pass filter
        datablock_filtered = butter_lowpass_filter(datablock, cutoff, fs, order)

        # make DataFrame again (to retrieve column parameters later)
        datablock_filtered = pd.DataFrame(datablock_filtered, columns=datablock.columns)

        # add data to new gesture dict
        gesture_dict_filtered[g].append(datablock_filtered)
    #print

### Zero Crossing Rate - Testing

In [None]:
# Zero-Crossing rate measures how often the signal changes its sign from positive to negative or vice-versa
# Matlab code: ZCR=mean(abs(diff(sign(Signal)))

In [None]:
def signal_one_plot(mydata,var):
    y = mydata[var]
    x = range(len(y))
    #x = mydata['TimeStamp']
    plt.plot(x, y)
    title = var
    plt.title(title)
    #plt.show()

In [None]:
# TESTING: 
# reusing mydata from above
mydata.head()

In [None]:
var = 'AX'

In [None]:
signal_one_plot(mydata,var)

In [None]:
# TEST with 1 signal
signal = mydata[var]

In [None]:
# NOTE: np.sign vs. np.signbit do not use sign function cause it can be -1 , 0 or 1 and then diff is 2
# instead we use np.signbit which is False or Ture (when the sign of the signal changes)
# this is what we want to count for ZCR
# testint the difference:
#np.sign(signal)
#np.signbit(signal)

In [None]:
# ZCR calculation
# ZCR=mean(abs(diff(sign(Signal))) # Matlab code

# for 1 signal row:
zcr = np.signbit(signal).diff().abs().mean()
zcr

In [None]:
# for multiple signal rows:
zcr = np.signbit(mydata).astype(int).diff(axis=0).abs().mean(axis=0)
zcr

### Zero Crossing Rate - Implementation

In [None]:
def calc_zero_crossings(datablock):
    '''computes row-wise zerocrossings'''
    # datablock is assumed to be pandas Dataframe and to have multiple signals in the rows
    # example for 1 signal row:
    #zcr = np.signbit(signal).diff().abs().mean()
    # for multiple signal rows:
    return np.signbit(datablock).astype(int).diff(axis=0).abs().mean(axis=0)

###  Add more features?

## Pre-Processing of the Signals

In [None]:
def preprocess_signal(testdata, n_samples=None, normalize=False, resampling=False, timestamps=None, filtering=False):
    
    # Min/max normalization
    # Note: to do it the fully right way, the minmax scaling should be done on all training data coherently
    # (currently its done per training block) and the same scaling values (min and max) should be reused here
    # see http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
    if normalize:
        testdata = preprocessing.minmax_scale(testdata, feature_range=(-1, 1), axis=0)
        
    # Time Resampling
    if resampling:
        
        if n_samples is None:
            # if not a FIXED number of samples is provided, the number of samples stays the same as in the input signal
            n_samples = testdata.shape[0] 
        
        
        
        if timestamps is None:
            testdata = resample(testdata, num=n_samples, window='hann')
        else:
            # if provided, we use the original timestamps to re-align the signal
            # TODO check: n_samples must match len(timestamps)
            testdata, timestamps2 = resample(testdata, num=n_samples, t=timestamps, window='hann')
        

    if filtering:
        # filter the signal block with low-pass filter
        testdata = butter_lowpass_filter(testdata, cutoff, fs, order)
        
    return testdata

## Feature Calculation

#### Statistical Features, Derivative, Zero Crossings

#### Options: Set Options here

In [None]:
# OPTIONS:

# either/or:
use_lowpassfilter = False
use_normalized = True 
use_resampled = True 
# if both are False, unresampled unnormalized input is used

# other options: # True is better for all
exclude_non_gestures = True
calc_derivative = True
calc_zerocrossings = True

In [None]:
# SELECT THE RIGHT INPUT DEPENDING ON OPTIONS

if use_lowpassfilter:
    input_dict = gesture_dict_filtered
elif use_resampled:
    input_dict = gesture_dict_resampled 
elif use_normalized and not normalize_global:
    input_dict = gesture_dict_norm
else:
    input_dict = gesture_dict_params # non resampled

if exclude_non_gestures:
    gestures_to_process = gestures_pos
else:
    gestures_to_process = input_dict.keys()

In [None]:
# Calc statistical features

def calc_statistical_features(matrix, axis=0):

    # to define the proper output shape, we need the "other axis" of the input shape (not the one where we compute along)
    other_axis = int(not axis) 
    n_rows = matrix.shape[other_axis]
    
    result = np.zeros((n_rows,7))
    
    result[:,0] = np.mean(matrix, axis=axis)
    result[:,1] = np.var(matrix, axis=axis, dtype=np.float64) 
    result[:,2] = stats.skew(matrix, axis=axis)
    result[:,3] = np.median(matrix, axis=axis)
    result[:,4] = np.min(matrix, axis=axis)
    result[:,5] = np.max(matrix, axis=axis)
    result[:,6] = stats.kurtosis(matrix, axis=axis, fisher=False) # Matlab calculates Pearson's Kurtosis

    result[np.where(np.isnan(result))] = 0
    return result

In [None]:
def calc_all_features(in_data, calc_derivative=False, calc_zerocrossings=False):

    # calc statistical features
    features = calc_statistical_features(in_data, axis=0)

    # vectorize
    features = features.flatten()

    if calc_derivative:
        # calc derivative of all signals
        in_data_deriv = np.gradient(in_data, axis=0)
        # calc statistics of derivatives
        features_deriv = calc_statistical_features(in_data_deriv, axis=0)
        # vectorize
        features_deriv = features_deriv.flatten()
        # concatenate to other features
        features = np.concatenate((features,features_deriv))

    if calc_zerocrossings:
        features_zcr = calc_zero_crossings(in_data)
        features = np.concatenate((features,features_zcr))

    return features

In [None]:
# NEW!!!!!
# we added preprocess signal below, thats why we need to use the original gesture_dict as input

input_dict = gesture_dict_params # non resampled


In [None]:
# COMPUTE FEATURES
# LOOP over all gesture data to create features

# initialize feature output for training data as a list
train_list = []
train_classes_num = []

for gest in sorted(gestures_to_process):
    print "G", gest, ':\t', len(input_dict[gest]), "examples"
    
    for in_data in input_dict[gest]:
        #print datablock.shape, 
        
        #if use_resampled:
        #    # resampled data has already extracted the param columns
        #    in_data = datablock
        #else:
        #    # for non-resampled we have to get the relevant data columns and transpose
        #    in_data = datablock[params].T
        
        # preprocessing
        in_data = preprocess_signal(in_data, samples, use_normalized, use_resampled, None, use_lowpassfilter)
    
        # convert to dataframe cause we use pandas .diff() in ZCR computation
        in_data = pd.DataFrame(in_data, columns=params)

        # calculate features
        features = calc_all_features(in_data, calc_derivative, calc_zerocrossings)

        # append to output list
        train_list.append(features)
        
        # store class (gesture number) for these features
        train_classes_num.append(gest)

In [None]:
features.shape

## Prepare Training Data

In [None]:
print "Training data:", len(train_list), "examples"

In [None]:
# make feature array from feature list (ALL training data)

train_data = np.array(train_list)
#del train_list
train_data.shape

In [None]:
# verify if the training categories (gesture numbers) have the same length
len(train_classes_num)

### Standardize

Zero-mean unit-variance Standardization

In [None]:
# ad-hoc scaling
#train_data = preprocessing.scale(train_data,axis=0)
# axis=0 means independently standardize each feature, otherwise (if 1) standardize each sample

In [None]:
# we now user StandardScaler class to keep the mean and variance for later
standardizer = preprocessing.StandardScaler()
train_data = standardizer.fit_transform(train_data)

## Machine Learning

### Train/Test Set Split

In [None]:
# split the data into train/test set

testset_size = 0.25

# sklearn >= 0.18
# use random_state to avoid that the results fluctuate randomly
splitter = StratifiedShuffleSplit(n_splits=1, test_size=testset_size, random_state=0) 
splits = splitter.split(train_data, train_classes_num)

# Note: this for loop is only executed once, if n_splits==1
for train_index, test_index in splits:
    #print "TRAIN INDEX:", train_index
    #print "TEST INDEX:", test_index
    
    # split the data
    train_set = train_data[train_index]
    test_set = train_data[test_index]
    
    # and the numeric classes (groundtruth)
    train_classes = np.array(train_classes_num)[train_index]
    test_classes = np.array(train_classes_num)[test_index]
    
    print "TRAIN SIZE:", train_set.shape
    print "TEST SIZE:", test_set.shape
    

## 1) Gesture Regonition - isolated (+ independent of host)

### ML Algorithm: SVM

Support Vector Machines

In [None]:
# try 3 different SVM kernels
kernels = ['linear','poly','rbf']

In [None]:
models = {}

for kernel in kernels:
    print "SVM", kernel,
    
    # TRAIN 
    start_time = time.time() # measure time

    model = OneVsRestClassifier(SVC(kernel=kernel)) #, degree=degree)) #, n_jobs=-1)  # n_jobs = n cpus, -1 = all
    # full set
    #model.fit(train_data, train_classes_num)
    # train set
    model.fit(train_set, train_classes)
    
    # store in dict
    models[kernel] = model

    end_time = time.time()
    print "Training time:", timestr(end_time - start_time)

#### Verification on Train Set (just for plausibility)

In [None]:
# predict on train set
pred_train = model.predict(train_set)
pred_train

In [None]:
train_classes

In [None]:
# Accuracy on train set (manual computation)
np.sum(pred_train == train_classes) * 1.0 / len(train_classes)

In [None]:
# Accuracy on train set (using scikit-learn)
accuracy_score(train_classes, pred_train)

## Evaluation

### Evaluation - Overall

In [None]:
result_ov = pd.DataFrame(index=kernels, columns=['Accuracy','Precision','Recall','F-Measure'])

In [None]:
for k in kernels:
    # predict on TEST set
    pred_test = models[k].predict(test_set) 
    
    # Accuracy, Precision, Reacall on TEST set
    result_ov.loc[k,'Accuracy'] = accuracy_score(test_classes, pred_test)
    result_ov.loc[k,'Precision'] = precision_score(test_classes, pred_test, average='macro')
    result_ov.loc[k,'Recall'] = recall_score(test_classes, pred_test, average='macro')
    result_ov.loc[k,'F-Measure'] = f1_score(test_classes, pred_test, average='macro')

In [None]:
pd.options.display.float_format = '{:,.2f}'.format
result_ov*100

### Evaluation - Per Gesture

In [None]:
# manual selection which one was the best one
best_model = models['poly']
pred_test = best_model.predict(test_set) 

In [None]:
# TODO check if the sorting of precision_score etc. is really in this order!!
labels = sorted(np.unique(test_classes))
gesture_names = [gesture_name(l) for l in labels]

In [None]:
# nice result dataframe
columns = ['Gesture','N_train','N_test','Precision','Recall','F1']
result_df = pd.DataFrame(index=labels,columns=columns)
result_df['Gesture'] = gesture_names

In [None]:
# number of train / test instances
values, counts = np.unique(train_classes, return_counts=True)
result_df['N_train'] = pd.Series(counts, index=values)
values, counts = np.unique(test_classes, return_counts=True)
result_df['N_test'] = pd.Series(counts, index=values)

In [None]:
# per class evaluation
result_df['Precision'] = precision_score(test_classes, pred_test, average=None) * 100
result_df['Recall'] = recall_score(test_classes, pred_test, average=None) * 100
result_df['F1'] = f1_score(test_classes, pred_test, average=None) * 100

In [None]:
result_df

In [None]:
# compare average P, R and F to overall P, R and F above (same)
result_df.mean(axis=0)

In [None]:
# Confusion Matrix
conf = confusion_matrix(test_classes, pred_test, labels=labels) # labels defines the order
labels_long = gestures_df.loc[labels,'name']
conf_df = pd.DataFrame(conf, index=labels_long, columns=labels)
conf_df

## 2) Continuous Time Series Prediction

What is our input stream?

The data of 1 trainset, because after each trainset, the TimeStamp is reset.

In [None]:
# a) loop over each Trainset
#group_by = ('Subject','Experiment','Trainset')

# b) use Experiment as the block where we do predictions (means it includes timestamp resets!!)
group_by = ('Subject','Experiment')

group_df = data.groupby(group_by)
group_df.max().head(50) 

In [None]:
print len(group_df), "Experiments / Trainsets"

In [None]:
# iterate over each Trainset
i =0
for name_tuple, group_data in group_df:
    i += 1
    #print str(name_tuple)
    
    if len(name_tuple) == 3:
        subject, exp, trainset = name_tuple
    elif len(name_tuple) == 2:
        subject, exp = name_tuple
        trainset = None
    
    break # for testing we just do 1 loop
    

In [None]:
name_tuple

In [None]:
group_data['TimeStamp'].min()

In [None]:
group_data['TimeStamp'].max()

In [None]:
if len(name_tuple) == 3:
    # check if TimeStamps are monotonously increasing
    if not np.all(group_data['TimeStamp'].diff()[1:] > 0):
        raise ValueError("Time Stamps are not monotonously increasing!")

In [None]:
# set these to None so that plot title is not shown wrongly
parcours = None
mutation = None
gesture = None

In [None]:
# which gestures appear in this Experiment or Trainset
group_data['Gesture'].unique()

In [None]:
one_plot(group_data,'TimeStamp')

In [None]:
grid_plot(group_data)

### Pre-Process the Data - Testing

the same way as it was done for training set

In [None]:
pd.options.display.float_format = '{:,.5f}'.format

In [None]:
# get the relevant columns out of group_data

In [None]:
timestamps = group_data['TimeStamp'].tolist()

In [None]:
test_gestures = group_data['Gesture'].tolist()

In [None]:
# 9 parameters columns
testdata = group_data[params]
testdata.shape

In [None]:
# Min/max normalization
# Note: to do it the fully right way, the minmax scaling should be done on all training data coherently
# (currently its done per training block) and the same scaling values (min and max) should be reused here
# see http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
# TODO store minmax_scale from training data and reapply same scaling here

if use_normalized:
    testdata = preprocessing.minmax_scale(testdata, feature_range=(-1, 1), axis=0, copy=make_copy)

In [None]:
# convert to dataframe cause plot needs column names
testdata = pd.DataFrame(testdata, columns=params)

grid_plot(testdata)

In [None]:
testdata.head(15)

In [None]:
# time resample

n_samples = len(timestamps)  

if use_resampled:
    # the number of samples stays the same
    # but we use the original timestamps to re-align the signal
    testdata_res, timestamps2 = resample(testdata, num=n_samples, t=timestamps)
    
    # convert to dataframe cause plot needs column names
    testdata_res = pd.DataFrame(testdata_res, columns=params)

In [None]:
timestamps[:15]

In [None]:
timestamps2[:15]

In [None]:
# timestamps are now equidistant
timestamps2[1:15] - timestamps2[:14]

In [None]:
testdata_res.head(15)

In [None]:
# debug check whether the values have been altered -> OK
#testdata == testdata_res

In [None]:
# overwrite testdata with testdata_res for subsequent coherent usage
#testdata = testdata_res

In [None]:
grid_plot(testdata)

### Continuous Prediction

In [None]:
# for our window_size (= signal length of input to Machine Learning)
# we take the average signal length of the trained gestures
window_size = avg_data_len 
window_size

In [None]:
# PREDICTION RESOLUTION
# how quickly do we step forward

# for now we choose half the window_size
step_size = window_size / 2

# can be set smaller for higher resolution

# TODO: set in milliseconds - convert back to sample length

step_size

In [None]:
# TODO: align with preprocess_signal function used in training data above

def preprocess_signal_continuous(testdata, normalize=False, resampling=False, timestamps=None, filtering=False):
    
    # Min/max normalization
    # Note: to do it the fully right way, the minmax scaling should be done on all training data coherently
    # (currently its done per training block) and the same scaling values (min and max) should be reused here
    # see http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html

    if normalize:
        testdata = preprocessing.minmax_scale(testdata, feature_range=(-1, 1), axis=0)
        
    # Time Resampling
    if resampling:
        # the number of samples stays the same
        # if provided, we use the original timestamps to re-align the signal
        n_samples = testdata.shape[0] # must match len(timestamps)
        testdata, timestamps2 = resample(testdata, num=n_samples, t=timestamps, window='hann')

    if filtering:
        # filter the signal block with low-pass filter
        testdata = butter_lowpass_filter(testdata, cutoff, fs, order)
        
    return testdata

In [None]:
# PREDICTION LOOP OVER 1 TRAINING INPUT BLOCK

def continuous_prediction(testdata, window_size, step_size):
    pos = 0
    n_samples = testdata.shape[0]
    
    # output
    test_groundtruth = [] # we create the groundtruth to compare with here
    predictions = []  # predictions are collected here

    while pos < (n_samples - window_size):
        # cut a window out of the incoming signal
        signal = testdata[pos:pos+window_size]

        # to get the "correct" gesture for that window, we cut the same part of the gesture information
        test_window_groundtruth = test_gestures[pos:pos+window_size]

        # we do a majority vote to say which gesture is pre-dominant in this window
        gt_gesture = Counter(test_window_groundtruth).most_common()[0][0]

        # calc features
        features = calc_all_features(signal, calc_derivative, calc_zerocrossings)

        # reshape to row vector for standardize and predict below (= single input sample)
        features = features.reshape(1, -1)  
        
        # STANDARDIZE features, the same way as done in training (reusing those mean and var)
        features = standardizer.transform(features)

        # ML prediction of gesture
        pred_gesture = best_model.predict(features)[0]

        # add to groundtruth and prediction list
        test_groundtruth.append(gt_gesture)
        predictions.append(pred_gesture)

        # step forward
        pos += step_size
    
    return test_groundtruth, predictions

In [None]:
# LOOP over ALL Experiments or Trainsets

i = 0
n_groups = len(group_df)

test_groundtruth_all = [] # we create the groundtruth to compare with here
predictions_all = []  # predictions are collected here

for name_tuple, group_data in group_df:
    
    i += 1
    print "Experiment", i, "/", n_groups, ":", str(name_tuple), group_data.shape,
    
    # just metadata
    if len(name_tuple) == 3:
        subject, exp, trainset = name_tuple
    elif len(name_tuple) == 2:
        subject, exp = name_tuple
        trainset = None
    
    # get signals, timestamps and gesture groundtruth
    timestamps = group_data['TimeStamp'].tolist()
    test_gestures = group_data['Gesture'].tolist()
    testdata = group_data[params]
    
    # preprocess testdata
    print "Preprocessing ...",
    testdata = preprocess_signal_continuous(testdata, use_normalized, use_resampled, timestamps, use_lowpassfilter)
    #print testdata.shape
    
    # convert to dataframe cause we use pandas .diff() in ZCR computation
    testdata = pd.DataFrame(testdata, columns=params)
    
    print "Prediction:", 
    test_groundtruth, predictions = continuous_prediction(testdata, window_size, step_size)
    print len(predictions), "predictions"
    
    test_groundtruth_all.extend(test_groundtruth)
    predictions_all.extend(predictions)
    

In [None]:
n_samples

In [None]:
gesture_name(11)

In [None]:
signal.shape

In [None]:
features.shape

In [None]:
n_samples

In [None]:
print len(predictions_all), "predictions"

In [None]:
print "collected true gestures include:"
np.unique(test_groundtruth_all).tolist()

In [None]:
print "predicted gestures include:"
np.unique(predictions_all).tolist()

In [None]:
pd.DataFrame({'groundt':test_groundtruth_all, 'pred':predictions_all})

In [None]:
result_ov = pd.DataFrame(columns=['result']) #columns=['Accuracy','Precision','Recall','F-Measure'])

# Accuracy, Precision, Reacall on TEST set
result_ov.loc['Accuracy'] = accuracy_score(test_groundtruth_all, predictions_all)
result_ov.loc['Precision'] = precision_score(test_groundtruth_all, predictions_all, average='macro')
result_ov.loc['Recall'] = recall_score(test_groundtruth_all, predictions_all, average='macro')
result_ov.loc['F-Measure'] = f1_score(test_groundtruth_all, predictions_all, average='macro')
result_ov

#### Confusion Matrix

In [None]:
conf = confusion_matrix(test_groundtruth_all, predictions_all, labels=labels) # labels defines the order

In [None]:
labels_long = gestures_df.loc[labels,'name']
conf_df = pd.DataFrame(conf, index=labels_long, columns=labels)
conf_df

In [None]:
labels_long