# Make-o-Matic Gesture Recognition

## Part 1: Data Preprocessing

2017 by Thomas Lidy, TU Wien

### Requirements

Python 2.7

pip install -r requirements.txt

Tested on OS: Ubuntu 16.04.3 LTS

In [1]:
import numpy as np
import pandas as pd

## Read Data

In [2]:
# main data

#input
csv_file = 'data/EXPORT_09042017173622.csv'

# output
csv_file_out = 'data/EXPORT_09042017173622_preprocessed.csv'

In [3]:
# Experiment Data
data = pd.read_csv(csv_file)

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
data.shape

(1435576, 26)

In [5]:
data.columns

Index([u'Trainset-ID', u'Experiment-ID', u'Subject-ID', u'Observer-ID',
       u'collectedByHand', u'Collector-ID', u'TimeStamp', u'RFID-ID',
       u'GRASP-A', u'GRASP-B', u'GRASP-C', u'AX', u'AY', u'AZ', u'EX', u'EY',
       u'EZ', u'USERINPUT', u'HANDINGLOVE', u' Parcours-ID', u'Parcours-Step',
       u'Mutation-ID', u' Mutation/HandIsActive', u'Host-ID', u'Host/Spot-ID',
       u'Gesture-ID'],
      dtype='object')

In [6]:
data.dtypes

Trainset-ID                object
Experiment-ID              object
Subject-ID                 object
Observer-ID                object
collectedByHand            object
Collector-ID               object
TimeStamp                   int64
RFID-ID                    object
GRASP-A                     int64
GRASP-B                     int64
GRASP-C                     int64
AX                        float64
AY                        float64
AZ                        float64
EX                        float64
EY                        float64
EZ                        float64
USERINPUT                    bool
HANDINGLOVE                  bool
 Parcours-ID               object
Parcours-Step               int64
Mutation-ID                object
 Mutation/HandIsActive       bool
Host-ID                    object
Host/Spot-ID               object
Gesture-ID                 object
dtype: object

In [7]:
data.head(10)

Unnamed: 0,Trainset-ID,Experiment-ID,Subject-ID,Observer-ID,collectedByHand,Collector-ID,TimeStamp,RFID-ID,GRASP-A,GRASP-B,...,EZ,USERINPUT,HANDINGLOVE,Parcours-ID,Parcours-Step,Mutation-ID,Mutation/HandIsActive,Host-ID,Host/Spot-ID,Gesture-ID
0,_TRAINSET14022017094616,E001,Andreas,Clemens,right,R01,0,0,781,8,...,-81.9375,False,True,P101,1,M151,True,H008,,G15
1,_TRAINSET14022017094616,E001,Andreas,Clemens,left,L01,19896,0,212,213,...,-84.5625,False,False,P101,1,M151,False,,,
2,_TRAINSET14022017094616,E001,Andreas,Clemens,right,R01,29001,0,782,0,...,-81.9375,False,True,P101,1,M151,True,H008,,G15
3,_TRAINSET14022017094616,E001,Andreas,Clemens,left,L01,29823,0,179,185,...,-84.5625,False,False,P101,1,M151,False,,,
4,_TRAINSET14022017094616,E001,Andreas,Clemens,right,R01,46136,0,782,6,...,-81.875,False,True,P101,1,M151,True,H008,,G15
5,_TRAINSET14022017094616,E001,Andreas,Clemens,left,L01,54766,0,130,155,...,-84.625,False,False,P101,1,M151,False,,,
6,_TRAINSET14022017094616,E001,Andreas,Clemens,right,R01,74902,0,784,7,...,-81.8125,False,True,P101,1,M151,True,H008,,G15
7,_TRAINSET14022017094616,E001,Andreas,Clemens,left,L01,79764,0,158,172,...,-84.625,False,False,P101,1,M151,False,,,
8,_TRAINSET14022017094616,E001,Andreas,Clemens,right,R01,97663,0,781,0,...,-81.75,False,True,P101,1,M151,True,H008,,G15
9,_TRAINSET14022017094616,E001,Andreas,Clemens,left,L01,103845,0,203,204,...,-84.6875,False,False,P101,1,M151,False,,,


## Some Data Stats

In [8]:
# how many subjects
subjects = data['Subject-ID'].unique().tolist()
subjects 

['Andreas', 'Alfred', 'Claudia', 'Dominik']

In [9]:
# how many observers
data['Observer-ID'].unique().tolist()

['Clemens']

In [10]:
# how many hands
data['Collector-ID'].unique().tolist()

['R01', 'L01']

In [11]:
# how many RFID ids
data['RFID-ID'].unique().tolist()

['000000000000',
 '09006734114B',
 0,
 '05003DD5CD20',
 '760057911BAB',
 '13005E4BB7B1',
 '1400135B8AD6',
 '14001486BE38',
 '7600577D0D51',
 '140014CD1AD7',
 '280015E55981',
 '05008C2F993F']

In [12]:
# how many experiments
experiments = data['Experiment-ID'].unique().tolist()
experiments

['E001', 'E002', 'E003', 'E004', 'E005', 'E006', 'E007', 'E008']

In [13]:
# how many training sets
len(data['Trainset-ID'].unique().tolist())

539

In [14]:
# how many parcours
len(data[' Parcours-ID'].unique())

116

In [15]:
data[' Parcours-ID'].unique()

array(['P101', 'P102', 'P103', 'P104', 'P105', 'P106', 'P107', 'P108',
       'P109', 'P110', 'P111', 'P112', 'P113', 'P114', 'P115', 'P116',
       'P117', 'P118', 'P401', 'P402', 'P403', 'P201', 'P202', 'P203',
       'P204', 'P205', 'P206', 'P207', 'P208', 'P211', 'P212', 'P213',
       'P216', 'P217', 'P218', 'P221', 'P222', 'P223', 'P226', 'P227',
       'P228', 'P701', 'P702', 'P705', 'P706', 'P709', 'P710', 'P713',
       'P714', 'P801', 'P802', 'P601', 'P602', 'P603', 'P606', 'P607',
       'P608', 'P611', 'P612', 'P613', 'P616', 'P617', 'P618', 'P621',
       'P622', 'P623', 'P501', 'P502', 'P504', 'P503', 'P301', 'P302',
       'P306', 'P307', 'P308', 'P311', 'P312', 'P316', 'P317', 'P318',
       'P321', 'P322', 'P326', 'P327', 'P328', 'P119', 'P120', 'P121',
       'P231', 'P232', 'P234', 'P233', 'P236', 'P235', 'P717', 'P718',
       'P719', 'P720', 'P721', 'P722', 'P723', 'P724', 'P626', 'P627',
       'P628', 'P629', 'P630', 'P331', 'P332', 'P333', 'P334', 'P335',
      

In [16]:
# how many parcours steps
data['Parcours-Step'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [17]:
# how many mutations
len(data['Mutation-ID'].unique())

177

In [18]:
# how many gestures
len(data['Gesture-ID'].unique())

19

In [19]:
# which gestures exist
sorted(data['Gesture-ID'].unique())

[nan,
 'G01',
 'G02',
 'G03',
 'G04',
 'G05',
 'G06',
 'G07',
 'G08',
 'G09',
 'G10',
 'G11',
 'G12',
 'G13',
 'G14',
 'G15',
 'G16',
 'G17',
 'G18']

In [20]:
data.head()

Unnamed: 0,Trainset-ID,Experiment-ID,Subject-ID,Observer-ID,collectedByHand,Collector-ID,TimeStamp,RFID-ID,GRASP-A,GRASP-B,...,EZ,USERINPUT,HANDINGLOVE,Parcours-ID,Parcours-Step,Mutation-ID,Mutation/HandIsActive,Host-ID,Host/Spot-ID,Gesture-ID
0,_TRAINSET14022017094616,E001,Andreas,Clemens,right,R01,0,0,781,8,...,-81.9375,False,True,P101,1,M151,True,H008,,G15
1,_TRAINSET14022017094616,E001,Andreas,Clemens,left,L01,19896,0,212,213,...,-84.5625,False,False,P101,1,M151,False,,,
2,_TRAINSET14022017094616,E001,Andreas,Clemens,right,R01,29001,0,782,0,...,-81.9375,False,True,P101,1,M151,True,H008,,G15
3,_TRAINSET14022017094616,E001,Andreas,Clemens,left,L01,29823,0,179,185,...,-84.5625,False,False,P101,1,M151,False,,,
4,_TRAINSET14022017094616,E001,Andreas,Clemens,right,R01,46136,0,782,6,...,-81.875,False,True,P101,1,M151,True,H008,,G15


In [21]:
# how many hosts
len(data['Host-ID'].unique())

10

In [22]:
# which Host Spots
data['Host/Spot-ID'].unique().tolist()

[nan, 'A', 'B', 'F']

## Data Cleanup

In [23]:
# rename some columns as the - in the column name does not allow for data.query()) to be used
# cut off the "-ID" suffix, remove a leading space and replace - by _

for col in data.columns:
    if col.endswith('-ID'):
        data.rename(columns={col: col[:-3]}, inplace=True)
        print "Renamed", col, "->", col[:-3]
        col = col[:-3]
    if col.startswith(' '):
        data.rename(columns={col: col[1:]}, inplace=True)
        print "Renamed", col, "->", col[1:]
        col = col[1:]
    if '-' in col:
        new_col = col.replace('-','_')
        data.rename(columns={col: new_col}, inplace=True)
        print "Renamed", col, "->", new_col

Renamed Trainset-ID -> Trainset
Renamed Experiment-ID -> Experiment
Renamed Subject-ID -> Subject
Renamed Observer-ID -> Observer
Renamed Collector-ID -> Collector
Renamed RFID-ID -> RFID
Renamed GRASP-A -> GRASP_A
Renamed GRASP-B -> GRASP_B
Renamed GRASP-C -> GRASP_C
Renamed  Parcours-ID ->  Parcours
Renamed  Parcours -> Parcours
Renamed Parcours-Step -> Parcours_Step
Renamed Mutation-ID -> Mutation
Renamed  Mutation/HandIsActive -> Mutation/HandIsActive
Renamed Host-ID -> Host
Renamed Host/Spot-ID -> Host/Spot
Renamed Gesture-ID -> Gesture


In [24]:
# remove rows that are not needed:
# all where Gesture-ID is NaN  (implies 'Mutation/HandIsActive' == False and Host is NaN)

data.dropna(axis=0, subset=['Gesture'], inplace=True)

In [25]:
# remove columns that we don't need
# to be ignored as communicated by MOM
cols_to_remove = ['USERINPUT', 'HANDINGLOVE', 'Observer','Collector',
                  'Mutation/HandIsActive']
# originally included: 'Trainset', 'RFID', 'Parcours_Step'

data.drop(cols_to_remove,inplace=True,axis=1)

In [26]:
data.shape

(780299, 21)

In [27]:
data.head()

Unnamed: 0,Trainset,Experiment,Subject,collectedByHand,TimeStamp,RFID,GRASP_A,GRASP_B,GRASP_C,AX,...,AZ,EX,EY,EZ,Parcours,Parcours_Step,Mutation,Host,Host/Spot,Gesture
0,_TRAINSET14022017094616,E001,Andreas,right,0,0,781,8,797,0.06,...,-0.1,216.8125,9.0625,-81.9375,P101,1,M151,H008,,G15
2,_TRAINSET14022017094616,E001,Andreas,right,29001,0,782,0,799,0.09,...,-0.11,217.0625,9.0625,-81.9375,P101,1,M151,H008,,G15
4,_TRAINSET14022017094616,E001,Andreas,right,46136,0,782,6,798,0.12,...,0.09,217.4375,9.125,-81.875,P101,1,M151,H008,,G15
6,_TRAINSET14022017094616,E001,Andreas,right,74902,0,784,7,798,0.08,...,0.03,217.625,9.125,-81.8125,P101,1,M151,H008,,G15
8,_TRAINSET14022017094616,E001,Andreas,right,97663,0,781,0,798,0.07,...,0.04,217.9375,9.1875,-81.75,P101,1,M151,H008,,G15


In [28]:
# also in this evaluation, we use only one-hand devices and only people who are right-handers
data['collectedByHand'].unique().tolist()

['right', 'left']

In [29]:
# right hand data rows
sum(data['collectedByHand']=='right')

711360

In [30]:
# left hand data rows
sum(data['collectedByHand']=='left')

68939

In [31]:
# REMOVE left hand data, keep only right hand
data = data[data['collectedByHand']=='right']
data.shape

(711360, 21)

In [32]:
# REMOVE column collectedByHand 
del data['collectedByHand']

### Convert Columns with String IDs to numbers, for later improved efficiency

In [34]:
def str_to_int(string):
    '''cut away first character and convert to int - used to convert Gesture IDs like "G01" to 1'''
    return int(string[1:])

In [35]:
cols_to_convert = ['Experiment','Parcours','Mutation','Host','Gesture']

for c in cols_to_convert:
    data[c] = data[c].apply(str_to_int)

In [36]:
data.head()

Unnamed: 0,Trainset,Experiment,Subject,TimeStamp,RFID,GRASP_A,GRASP_B,GRASP_C,AX,AY,AZ,EX,EY,EZ,Parcours,Parcours_Step,Mutation,Host,Host/Spot,Gesture
0,_TRAINSET14022017094616,1,Andreas,0,0,781,8,797,0.06,-0.02,-0.1,216.8125,9.0625,-81.9375,101,1,151,8,,15
2,_TRAINSET14022017094616,1,Andreas,29001,0,782,0,799,0.09,-0.04,-0.11,217.0625,9.0625,-81.9375,101,1,151,8,,15
4,_TRAINSET14022017094616,1,Andreas,46136,0,782,6,798,0.12,-0.09,0.09,217.4375,9.125,-81.875,101,1,151,8,,15
6,_TRAINSET14022017094616,1,Andreas,74902,0,784,7,798,0.08,-0.08,0.03,217.625,9.125,-81.8125,101,1,151,8,,15
8,_TRAINSET14022017094616,1,Andreas,97663,0,781,0,798,0.07,-0.09,0.04,217.9375,9.1875,-81.75,101,1,151,8,,15


In [37]:
data.columns

Index([u'Trainset', u'Experiment', u'Subject', u'TimeStamp', u'RFID',
       u'GRASP_A', u'GRASP_B', u'GRASP_C', u'AX', u'AY', u'AZ', u'EX', u'EY',
       u'EZ', u'Parcours', u'Parcours_Step', u'Mutation', u'Host',
       u'Host/Spot', u'Gesture'],
      dtype='object')

## Export preprocessed Data

In [38]:
data.to_csv(csv_file_out, index=False) # index=False means to omit the numeric index column