In [49]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler 
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np

In [53]:
# Load comma delimited sensor data into Pandas data frame
sensor_file = "../data/sensor_11_12_2017_10_52_24.csv"
df = pd.read_csv(sensor_file)
print df.shape

# Extract features and target (y)
X = df.iloc[:,1:15] # exclude data
y = df.iloc[:,16]
print X.shape
print y.shape

# Use 80% of data for training, 20% for testing
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20, random_state = 1)
print X_train.shape
print X_test.shape

# Scaling features, mean = 0, stddev = 1
scaler.fit(X_train)  
X_train = scaler.transform(X_train)

# Head
df.head()

(5457, 17)
(5457, 14)
(5457L,)
(4365, 14)
(1092, 14)


Unnamed: 0,DateTime,Temp,Humidity,Pressure,Yaw,Pitch,Roll,MagX,MagY,MagZ,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,State
0,11/12/2017 10:52:35.251176,29.361,40.702,1031.956,215.801975,356.730118,2.171165,-5.351357,5.945352,26.308798,0.056247,0.034719,0.960166,-0.060679,0.072064,0.049957,standing
1,11/12/2017 10:52:35.523388,29.308,39.749,1031.952,216.099403,356.626506,2.103062,-10.72013,12.090914,53.852757,0.055035,0.016267,0.974544,0.011399,0.019521,-0.016927,standing
2,11/12/2017 10:52:35.688238,29.468,40.231,1031.941,216.340904,356.301112,2.310486,-13.004756,14.781075,65.216164,0.046307,0.01481,0.939451,0.040319,0.005237,-0.007642,standing
3,11/12/2017 10:52:35.849093,29.397,40.121,1031.95,216.613607,356.519507,2.298305,-13.880388,16.216396,69.69474,0.060854,0.016267,0.945544,-0.005668,0.020673,0.007911,standing
4,11/12/2017 10:52:36.010403,29.343,40.088,1031.943,216.940253,356.745464,2.196436,-13.895806,16.390114,71.753525,0.055763,0.023308,0.937014,-0.008612,0.015847,0.017006,standing


In [44]:
# Supervised neural network model
# http://scikit-learn.org/stable/modules/neural_networks_supervised.html
# Pipeline, push scaler and classifer
pipe = Pipeline([('scl', StandardScaler()),
                     ('clf', MLPClassifier(solver='lbfgs', alpha=0.00001,
                                           hidden_layer_sizes=(15,15,8), random_state=1))])

# How well does the model converge? Overfitting? Underfitting?
scores = cross_val_score(estimator=pipe, X=X_train, y=y_train,cv=10,n_jobs=1)
for s in scores:
    print s
    
# The final CV acc for training data was 0.64 using Logistic Regression
print 'CV Acc: %.3f +/- %.3f' % (np.mean(scores), np.std(scores))


0.909090909091
0.892938496583
0.89497716895
0.915525114155
0.906392694064
0.896551724138
0.912643678161
0.921658986175
0.903225806452
0.910138248848
CV Acc: 0.906 +/- 0.009


In [46]:
# Fit training data
pipe.fit(X_train, y_train)
preds = pipe.predict(scaler.transform(X_test)) # Get predictions

In [55]:
# Confusion Table, bit vague
confusion_matrix(y_test,preds)

array([[ 38,   8,   0,   0,   2,   0,   1,   0],
       [ 11,  36,   0,   2,   0,   0,   0,   4],
       [  1,   0,  94,   1,   2,   5,   1,   0],
       [  1,   0,   0,  65,   1,   0,   0,   0],
       [  0,   1,   2,   0, 126,   1,   0,   0],
       [  3,   0,   3,   0,   0, 156,   9,   6],
       [  4,   0,   0,   0,   3,   2, 134,  14],
       [  4,   4,   0,   0,   5,   4,  16, 322]])

In [54]:
# Better table
pd.crosstab(y_test, preds)

# Ideally, there would zeros everywhere except for the main diagonal
# Last row, for example, the actual state is 'walking', but the model
# is predicting 'turn-right'

col_0,go_down,go_up,spin_left,spin_right,standing,turn_left,turn_right,walking
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
go_down,38,8,0,0,2,0,1,0
go_up,11,36,0,2,0,0,0,4
spin_left,1,0,94,1,2,5,1,0
spin_right,1,0,0,65,1,0,0,0
standing,0,1,2,0,126,1,0,0
turn_left,3,0,3,0,0,156,9,6
turn_right,4,0,0,0,3,2,134,14
walking,4,4,0,0,5,4,16,322


In [50]:
# Metrics
precision, recall, fscore, support = score(y_test, preds)
print "Precision: ", precision
print "Recall: ", recall
print "Fscore: ", fscore

Precision:  [ 0.61290323  0.73469388  0.94949495  0.95588235  0.90647482  0.92857143
  0.83229814  0.93063584]
Recall:  [ 0.7755102   0.67924528  0.90384615  0.97014925  0.96923077  0.88135593
  0.85350318  0.90704225]
Fscore:  [ 0.68468468  0.70588235  0.92610837  0.96296296  0.93680297  0.90434783
  0.8427673   0.91868759]


# Small Sample 

In [1]:
# http://scikit-learn.org/stable/modules/neural_networks_supervised.html
# small demo
X = [[0., 0.], [1., 1.]]
y = [0, 1]
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X, y)  
clf.predict([[2., 2.], [-1., -2.]])

array([1, 0])

In [4]:
# small demo with scaling
scaler = StandardScaler()  
X = [[0., 0.], [1., 1.]]
y = [0, 1]

scaler.fit(X)  
X = scaler.transform(X)  

clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X, y)  
testX = [[2., 2.], [-1., -2.]]
testX = scaler.transform(testX)
clf.predict(testX)

array([1, 0])