# Human Activity Recognition Using Smartphones

## Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline

## Reading train and test datasets

In [2]:
from sklearn.utils import shuffle

train = pd.read_csv('../train.csv')
test = pd.read_csv('../test.csv')

train = shuffle(train)
test = shuffle(test)

display(train.head())
print (train.shape)

display(test.head())
print (test.shape)

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
5399,0.294399,-0.045618,-0.116598,-0.659474,-0.313792,-0.568337,-0.687095,-0.322025,-0.585085,-0.591991,...,-0.462188,-0.206322,0.690312,0.878521,0.942648,-0.858532,0.182468,-0.033997,25,WALKING
6934,0.407492,-0.062399,-0.080733,0.152028,0.07634,-0.297461,0.118493,-0.025675,-0.317016,0.30372,...,-0.578954,-0.561848,0.300183,0.249794,-0.467793,-0.916815,0.135807,-0.028299,29,WALKING_DOWNSTAIRS
1518,0.277121,-0.014629,-0.109159,-0.994144,-0.983159,-0.956552,-0.994593,-0.983985,-0.950203,-0.941365,...,-0.92083,0.016442,0.482238,-0.277964,-0.47524,-0.891368,0.045626,-0.042373,7,SITTING
1789,0.12999,0.104839,0.068729,-0.895119,-0.348877,-0.256996,-0.893647,-0.318569,-0.221582,-0.936298,...,-0.524179,-0.028562,0.152644,0.081445,-0.175157,-0.578842,-0.031039,-0.252988,8,SITTING
1871,0.295096,-0.039766,-0.121931,0.13999,0.28391,0.129018,0.077091,0.252174,0.110129,0.42178,...,-0.533679,-0.307743,0.584115,-0.971707,0.279568,-0.678024,0.18385,0.231318,8,WALKING_DOWNSTAIRS


(7352, 563)


Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
305,0.257569,0.020305,-0.050276,-0.979516,-0.944338,-0.851698,-0.979796,-0.942719,-0.836453,-0.930955,...,-0.467542,0.054797,0.033184,0.940406,0.718797,-0.807354,0.147707,-0.11171,4,STANDING
1054,0.256139,0.017335,-0.077378,-0.926185,-0.913438,-0.676905,-0.937094,-0.911947,-0.700733,-0.866971,...,-0.771993,0.072317,-0.200607,-0.388833,0.905882,-0.894243,0.034308,-0.026026,10,STANDING
19,0.271166,-0.025973,-0.094923,-0.970124,-0.901878,-0.9653,-0.977387,-0.908967,-0.968392,-0.911152,...,0.737524,-0.065276,0.156377,-0.154816,0.131187,-0.601416,0.371978,-0.010461,2,STANDING
2668,0.281049,-0.016769,-0.105192,-0.981697,-0.991047,-0.98214,-0.981284,-0.99043,-0.981537,-0.931659,...,-0.839571,-0.426844,0.272259,-0.117676,-0.194839,0.594365,-0.531884,-0.474014,24,LAYING
2491,0.274084,-0.017381,-0.110882,-0.997789,-0.992738,-0.987627,-0.998133,-0.992374,-0.986704,-0.94373,...,-0.736604,0.174853,0.955483,0.471503,0.447878,0.621981,-0.84735,-0.144672,20,LAYING


(2947, 563)


## Separating target variables from the dataset

In [3]:
# Separate subject information
subject_training_data = train['subject']
subject_testing_data = test['subject']

# Separate labels
training_labels = train['Activity']
testing_labels = test['Activity']

# Drop labels and subject info from data
train = train.drop(['subject', 'Activity'], axis=1)
test = test.drop(['subject', 'Activity'], axis=1)

# Print some information about our data
print("Training data consists of {} instances of data with {} total features".format(train.shape[0], train.shape[1]))
print("Training data includes value counts of\n", training_labels.value_counts())
print("Testing data consists of {} instances of data".format(test.shape[0]))
print("Testing data includes value counts of\n", testing_labels.value_counts())

Training data consists of 7352 instances of data with 561 total features
('Training data includes value counts of\n', LAYING                1407
STANDING              1374
SITTING               1286
WALKING               1226
WALKING_UPSTAIRS      1073
WALKING_DOWNSTAIRS     986
Name: Activity, dtype: int64)
Testing data consists of 2947 instances of data
('Testing data includes value counts of\n', LAYING                537
STANDING              532
WALKING               496
SITTING               491
WALKING_UPSTAIRS      471
WALKING_DOWNSTAIRS    420
Name: Activity, dtype: int64)


In [4]:
from sklearn.preprocessing import LabelEncoder

# Encode our categorical labels into numerical target labels
le = LabelEncoder()
le = le.fit(["WALKING", "WALKING_UPSTAIRS", "WALKING_DOWNSTAIRS", "SITTING", "STANDING", "LAYING"])
enc_training_labels = le.transform(training_labels)
enc_testing_labels = le.transform(testing_labels)
print(training_labels.head())
print(enc_training_labels)
print(testing_labels.head())
print(enc_testing_labels)

5399               WALKING
6934    WALKING_DOWNSTAIRS
1518               SITTING
1789               SITTING
1871    WALKING_DOWNSTAIRS
Name: Activity, dtype: object
[3 4 1 ... 3 2 5]
305     STANDING
1054    STANDING
19      STANDING
2668      LAYING
2491      LAYING
Name: Activity, dtype: object
[2 2 2 ... 3 1 3]


## Importing Classifiers

In [5]:
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

kn = KNeighborsClassifier()
sv = SVC()
lr = LogisticRegression()

## Training KNN Classifier

In [6]:
kn.fit(train,enc_training_labels)
kn_pred = kn.predict(test)
print ("K Nearest Neighbors Test Accuracy : {}".format(accuracy_score(enc_testing_labels,kn_pred)))

K Nearest Neighbors Test Accuracy : 0.900237529691


## Training SVM Classifier

In [7]:
sv.fit(train,enc_training_labels)
sv_pred = sv.predict(test)
print ("Support Vector Machine Test Accuracy : {}".format(accuracy_score(enc_testing_labels,sv_pred)))

Support Vector Machine Test Accuracy : 0.940278249067


## Training Logistic Regression Classifier

In [8]:
lr.fit(train,enc_training_labels)
lr_pred = lr.predict(test)
print ("Logistic Regression Test Accuracy : {}".format(accuracy_score(enc_testing_labels,lr_pred)))

Logistic Regression Test Accuracy : 0.961995249406


## Evaluating using cross_val_score

In [9]:
def evalClfTrain(clf):
    scores = cross_val_score(clf, train, enc_training_labels)
    avg = scores.mean()
    return "Training performances: {}, \nAverage: {}".format(scores, avg)

print ("K Neighbors {}".format(evalClfTrain(kn)))
print ("Support Vector Machine {}".format(evalClfTrain(sv)))
print ("Logistic Regression {}".format(evalClfTrain(lr)))

K Neighbors Training performances: [0.96166395 0.96125612 0.96568627], 
Average: 0.962868779921
Support Vector Machine Training performances: [0.93270799 0.94535073 0.93831699], 
Average: 0.938791907011
Logistic Regression Training performances: [0.98287113 0.98450245 0.98570261], 
Average: 0.984358728991
