In [334]:
import sys

import numpy as np
import collections
from parse_rest.connection import register
from parse_rest.datatypes import Object
import key
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

register(key.APP_ID, key.REST_API_KEY)

class DataChunk(Object):
    pass

In [347]:
allData = DataChunk.Query.all().filter(appVersion__exists=True, interTapDistances__exists=True).limit(1000)
hcData = allData.filter(userId='acsalu')
coData = allData.filter(userId='co273')
jeanData = allData.filter(userId='jean')

In [336]:
featureName = ['accelerationMagnitudes', 'totalNumberOfDeletions', 'gyroMagnitudes', \
                'interTapDistances', 'tapDurations', 'userId']
emotions = {'Happy': 0, 'Neutral': 1, 'Calm': 1, 'Sad': 2, 'Angry': 3, 'Anxious': 4}
uids = {'acsalu': 0, 'co273': 1, 'jean': 2}

In [337]:
def label_stats(data):
    labels = map(lambda d: d.emotion, data)
    counter = collections.Counter(labels)
    return counter

In [377]:
print('allData: ', label_stats(allData), '  Total: ', len(allData))
print('hcData: ', label_stats(hcData), '  Total: ', len(hcData))
print('coData: ', label_stats(coData), '  Total: ', len(coData))
print('jeanData: ', label_stats(jeanData), '  Total: ', len(jeanData))

allData:  Counter({'Happy': 127, 'Calm': 90, 'Sad': 53, 'Angry': 5})   Total:  275
hcData:  Counter({'Calm': 10, 'Happy': 8, 'Sad': 8})   Total:  26
coData:  Counter({'Happy': 94, 'Calm': 68, 'Sad': 32, 'Angry': 5})   Total:  199
jeanData:  Counter({'Happy': 25, 'Sad': 13, 'Calm': 12})   Total:  50


In [378]:
data = coData
labels = list(map(lambda x: emotions[x.emotion], data))
[accMag, ttlNODel, gyro, intTapDist, tapDur, uid] = \
    [[getattr(d, feature) for d in data] for feature in featureName]
    
aveAccMag, stdAccMag = [np.mean(a) for a in accMag], [np.std(a) for a in accMag]
aveGyro, stdGyro = [np.mean(g) for g in gyro], [np.std(g) for g in gyro]
aveIntTapDist, stdIntTapDist = [np.mean(i) for i in intTapDist], [np.std(i) for i in intTapDist]
aveTapDur, stdTapDur = [np.mean(t) for t in tapDur], [np.std(t) for t in tapDur]
uid_fea = list(map(lambda x: uids[x], uid))

# Normalization

In [370]:
def normalize(feature):
    std = np.std(feature)
    if std == 0:
        return feature-np.mean(feature)
    return (feature-np.mean(feature))/np.std(feature)

# features = [aveAccMag, stdAccMag, ttlNODel, aveGyro, stdGyro, aveIntTapDist, stdIntTapDist, \
#             aveTapDur, stdTapDur]
features = [aveAccMag, stdAccMag, ttlNODel, aveGyro, stdGyro, aveIntTapDist, stdIntTapDist, \
            aveTapDur, stdTapDur, uid_fea]
features = list(map(normalize, features))
features = np.array(features).T

# Logistic Regression

In [371]:
model = LogisticRegression()
score = cross_val_score(model, features, labels).mean()
print(score)

0.520536106234


# SVM

In [376]:
clf = svm.SVC()
score = cross_val_score(clf, features, labels).mean()
print(score)

0.592519937079


# Naive Bayes

In [373]:
gnb = GaussianNB()
score = cross_val_score(gnb, features, labels).mean()
print(score)

0.32659770998


# Random Forest

In [374]:
rf = RandomForestClassifier()
score = cross_val_score(rf, features, labels).mean()
print(score)

0.562509145449
