In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler 
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np
import pickle

##



# Load and Divide Data into Training and Testing  
  
In addition, the data is scaled. 


In [2]:
# Load comma delimited sensor data into Pandas data frame
sensor_file = "../data/sensor_11_12_2017_10_52_24.csv"
df = pd.read_csv(sensor_file)
print (df.shape)

# Extract features and target (y)
X = df.iloc[:,1:15] # exclude data
y = df.iloc[:,16]
print (X.shape)
print (y.shape)

# Use 80% of data for training, 20% for testing
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20, random_state = 1)
print (X_train.shape)
print (X_test.shape)

# Scaling features, mean = 0, stddev = 1
scaler = StandardScaler()
scaler.fit(X_train)  
X_train = scaler.transform(X_train)

# Head
df.head()

##

(5457, 17)
(5457, 14)
(5457L,)
(4365, 14)
(1092, 14)


Unnamed: 0,DateTime,Temp,Humidity,Pressure,Yaw,Pitch,Roll,MagX,MagY,MagZ,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,State
0,11/12/2017 10:52:35.251176,29.361,40.702,1031.956,215.801975,356.730118,2.171165,-5.351357,5.945352,26.308798,0.056247,0.034719,0.960166,-0.060679,0.072064,0.049957,standing
1,11/12/2017 10:52:35.523388,29.308,39.749,1031.952,216.099403,356.626506,2.103062,-10.72013,12.090914,53.852757,0.055035,0.016267,0.974544,0.011399,0.019521,-0.016927,standing
2,11/12/2017 10:52:35.688238,29.468,40.231,1031.941,216.340904,356.301112,2.310486,-13.004756,14.781075,65.216164,0.046307,0.01481,0.939451,0.040319,0.005237,-0.007642,standing
3,11/12/2017 10:52:35.849093,29.397,40.121,1031.95,216.613607,356.519507,2.298305,-13.880388,16.216396,69.69474,0.060854,0.016267,0.945544,-0.005668,0.020673,0.007911,standing
4,11/12/2017 10:52:36.010403,29.343,40.088,1031.943,216.940253,356.745464,2.196436,-13.895806,16.390114,71.753525,0.055763,0.023308,0.937014,-0.008612,0.015847,0.017006,standing


# Construct Neural Network Model and Tweak

In [3]:
# Supervised neural network model
# http://scikit-learn.org/stable/modules/neural_networks_supervised.html

# NN has 3 hidden layers. 1st layer has 15 neurons which equals 15 features
#     2nd layer has 100 (just because), and 3rd layer has 8 which equals 8 classes

# Pipeline, push scaler and classifer
pipe = Pipeline([('scl', StandardScaler()),
                 ('clf', MLPClassifier(solver='lbfgs', alpha=0.00001,
                                           hidden_layer_sizes=(15,100, 8), random_state=1))])

# How well does the model converge? Overfitting? Underfitting?
scores = cross_val_score(estimator=pipe, X=X_train, y=y_train,cv=10,n_jobs=1)
for s in scores:
    print (s)
    
# The final CV acc for training data was 0.64 using Logistic Regression
print ('CV Acc: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))

##

0.904545454545
0.906605922551
0.908675799087
0.915525114155
0.920091324201
0.92183908046
0.905747126437
0.935483870968
0.923963133641
0.907834101382
CV Acc: 0.915 +/- 0.010


# Use Models to Make Predictions

In [4]:
# Fit training data
pipe.fit(X_train, y_train)
predictions = pipe.predict(scaler.transform(X_test)) # Get predictions

##

# Evaluate Model Performance on Testing Data

In [5]:
# Better table
pd.crosstab(y_test, predictions)

# Ideally, there would zeros everywhere except for the main diagonal
# Last row, for example, the actual state is 'walking', but the model
# is predicting 'turn-right'

##

col_0,go_down,go_up,spin_left,spin_right,standing,turn_left,turn_right,walking
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
go_down,29,9,1,0,5,0,5,0
go_up,4,44,2,1,0,0,1,1
spin_left,0,1,100,0,0,3,0,0
spin_right,0,0,0,66,1,0,0,0
standing,0,2,0,0,125,0,0,3
turn_left,1,1,0,0,5,155,6,9
turn_right,0,0,0,1,2,2,138,14
walking,0,1,0,0,4,4,9,337


In [6]:
# Metrics
precision, recall, fscore, support = score(y_test, predictions)

print ('Prec ','Rec','  Fscore')
for i in range(len(precision)):
    print (('%.3f' % precision[i]), ('%.3f' % recall[i]), ('%.3f' % fscore[i]))
    
## Note that accuracy is not appropriate for unbalanced classes.
## F-score is the superior metric
## The lowest Fscores are associated with 'go_down' and 'go_up'

##

('Prec ', 'Rec', '  Fscore')
('0.853', '0.592', '0.699')
('0.759', '0.830', '0.793')
('0.971', '0.962', '0.966')
('0.971', '0.985', '0.978')
('0.880', '0.962', '0.919')
('0.945', '0.876', '0.909')
('0.868', '0.879', '0.873')
('0.926', '0.949', '0.937')


# Deploying Demonstration  
  
The following code demonstrate how the Neural Network model is saved and deployed to automatically classify sensor readings into one of eight classes.  

I intend on loading this model to the Raspberry Pi and feeding it with the sensor data to indicate automatically what motion the user is making.


In [7]:
# Use pickle library to save model
filename = '../model/sensor_model_2017.11.19a.sav'
pickle.dump(pipe, open(filename, 'wb'))


In [8]:
# NOTE: For reference only
#Add this to data logger for automatically describing the state of motion

# Load our saved model
loaded_model = pickle.load(open(filename, 'rb'))

# Simulate 10 consecutive sensor readings and make a prediction
for i in range(100,110):
    prediction = loaded_model.predict(scaler.transform(X_test.iloc[i,:].reshape(1,-1))) 
    print (prediction)
    
    
## It may be necessary to calculate a running average on sensors to minimize
## noise.

##

['walking']
['walking']
['turn_left']
['spin_left']
['turn_right']
['spin_right']
['spin_right']
['go_up']
['standing']
['walking']




# Compares Before and After Reshaping

In [43]:
# Reference only. In the next cell the data must be reshaped. Here are the differences.
print (X_test.iloc[1,:])
print (X_test.iloc[1,:].reshape(1,-1))

##

 Temp          25.527000
 Humidity      36.760000
 Pressure    1025.950000
 Yaw          158.921194
 Pitch        356.681997
 Roll           2.092825
 MagX         -42.919319
 MagY         -14.954748
 MagZ          76.645782
 AccX           0.042428
 AccY           0.022094
 AccZ           0.943594
 GyroX          0.000503
 GyroY          0.017684
Name: 3196, dtype: float64
[[  2.55270000e+01   3.67600000e+01   1.02595000e+03   1.58921194e+02
    3.56681997e+02   2.09282500e+00  -4.29193190e+01  -1.49547480e+01
    7.66457820e+01   4.24280000e-02   2.20940000e-02   9.43594000e-01
    5.03000000e-04   1.76840000e-02]]


  
