In [165]:
import numpy as np
import collections
from parse_rest.connection import register
from parse_rest.datatypes import Object
import key
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation

register(key.APP_ID, key.REST_API_KEY)

class DataChunk(Object):
    pass

In [224]:
allData = DataChunk.Query.all().filter(appVersion='0.3.1', interTapDistances__exists=True).limit(1000)
hcData = allData.filter(userId='acsalu')
coData = allData.filter(userId='co273')
jeanData = allData.filter(userId='jean')

In [225]:
featureName = ['accelerationMagnitudes', 'totalNumberOfDeletions', 'gyroMagnitudes', \
                'interTapDistances', 'tapDurations', 'userId']
emotions = {'Happy': 0, 'Neutral': 1, 'Calm': 1, 'Sad': 2, 'Angry': 3, 'Anxious': 4}
uids = {'acsalu': 0, 'co273': 1, 'jean': 2}

In [226]:
def label_stats(data):
    labels = map(lambda d: d.emotion, data)
    counter = collections.Counter(labels)
    return counter

In [227]:
print('allData: ', label_stats(allData), '  Total: ', len(allData))
print('hcData: ', label_stats(hcData), '  Total: ', len(hcData))
print('coData: ', label_stats(coData), '  Total: ', len(coData))
print('jeanData: ', label_stats(jeanData), '  Total: ', len(jeanData))

allData:  Counter({'Happy': 111, 'Calm': 82, 'Sad': 52, 'Angry': 5})   Total:  250
hcData:  Counter({'Calm': 9, 'Sad': 7, 'Happy': 6})   Total:  22
coData:  Counter({'Happy': 94, 'Calm': 61, 'Sad': 32, 'Angry': 5})   Total:  192
jeanData:  Counter({'Sad': 13, 'Calm': 12, 'Happy': 11})   Total:  36


In [228]:
data = coData
labels = list(map(lambda x: emotions[x.emotion], data))
[accMag, ttlNODel, gyro, intTapDist, tapDur, uid] = \
    [[getattr(d, feature) for d in data] for feature in featureName]
    
aveAccMag, stdAccMag = [np.mean(a) for a in accMag], [np.std(a) for a in accMag]
aveGyro, stdGyro = [np.mean(g) for g in gyro], [np.std(g) for g in gyro]
aveIntTapDist, stdIntTapDist = [np.mean(i) for i in intTapDist], [np.std(i) for i in intTapDist]
aveTapDur, stdTapDur = [np.mean(t) for t in tapDur], [np.std(t) for t in tapDur]
uid_fea = list(map(lambda x: uids[x], uid))

# Normalization

In [229]:
def normalize(feature):
    std = np.std(feature)
    if std == 0:
        return feature-np.mean(feature)
    return (feature-np.mean(feature))/np.std(feature)

features = [aveAccMag, stdAccMag, ttlNODel, aveGyro, stdGyro, aveIntTapDist, stdIntTapDist, \
            aveTapDur, stdTapDur]
# features = [aveAccMag, stdAccMag, ttlNODel, aveGyro, stdGyro, aveIntTapDist, stdIntTapDist, \
#             aveTapDur, stdTapDur, uid_fea]
features = list(map(normalize, features))
features = np.array(features).T
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    features, labels, test_size=0.3, random_state=0)

# Logistic Regression

In [230]:
model = LogisticRegression()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))

0.655172413793


# SVM

In [231]:
clf = svm.SVC()
clf.fit(X_train, y_train)
print(clf.score(X_test, y_test))

0.724137931034


# Naive Bayes

In [232]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
print(gnb.score(X_train, y_train))

0.425373134328


# Random Forest

In [233]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
print(rf.score(X_test, y_test))

0.706896551724
