# Human Activity Recognition Using Smartphones

## Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline

## Reading train and test datasets

In [2]:
from sklearn.utils import shuffle

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

train = shuffle(train)
test = shuffle(test)

display(train.head())
print (train.shape)

display(test.head())
print (test.shape)

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
2948,0.280577,-0.019075,-0.108925,-0.990098,-0.940288,-0.970025,-0.991382,-0.93409,-0.966288,-0.921962,...,-0.7457,0.252295,0.141005,-0.108473,0.776666,0.30281,-0.227041,-0.722913,16,LAYING
1985,0.24747,-0.047274,-0.145256,-0.416532,-0.052237,-0.521678,-0.442892,-0.055177,-0.516036,-0.362674,...,-0.778921,0.164711,0.212857,-0.756518,-0.158159,-0.738673,0.230249,-0.109309,11,WALKING
2865,0.362601,-0.004455,-0.036326,-0.020696,-0.029823,-0.161643,-0.097043,0.010732,-0.115906,0.208748,...,-0.838066,-0.362449,0.508568,-0.975211,-0.493451,-0.683488,0.309936,-0.037207,15,WALKING_UPSTAIRS
6992,0.278892,-0.018043,-0.098066,-0.997725,-0.96524,-0.995022,-0.997764,-0.96263,-0.995205,-0.942735,...,-0.903964,-0.013647,-0.023932,0.886785,0.541021,-0.820148,0.21219,0.069219,30,STANDING
7101,0.378966,-0.000527,-0.203853,-0.184955,-0.142758,-0.344134,-0.213739,-0.255421,-0.310713,-0.007459,...,-0.638901,-0.338016,0.865583,0.853514,0.839211,-0.895912,0.170174,0.017783,30,WALKING_DOWNSTAIRS


(7352, 563)


Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
1819,0.216593,-0.041674,-0.130147,0.167305,0.302163,-0.304896,0.111833,0.224909,-0.415972,0.329113,...,-0.818427,0.357026,-0.343402,0.94929,0.06458,-0.80753,0.230014,-0.000379,13,WALKING_DOWNSTAIRS
723,0.195838,-0.010125,-0.109424,-0.459268,-0.263111,-0.402595,-0.557896,-0.188874,-0.376762,-0.021879,...,-0.513359,0.893742,-0.286999,0.959302,-0.511014,-0.878063,0.146438,0.09059,9,WALKING_UPSTAIRS
868,0.174624,-0.023687,-0.127852,0.133868,-0.216917,0.268587,0.007173,-0.275324,0.190467,0.525293,...,-0.456609,0.707162,-0.694039,-0.757886,-0.450169,-0.83468,0.084675,0.137952,9,WALKING_DOWNSTAIRS
591,0.269379,-0.037453,-0.102167,0.136295,-0.134996,-0.445638,0.068033,-0.190519,-0.475488,0.359865,...,-0.984193,0.079764,0.550523,0.919073,0.595812,-0.873063,0.107086,-0.074685,4,WALKING_DOWNSTAIRS
2267,0.276054,-0.015739,-0.108058,-0.997684,-0.994767,-0.991306,-0.998274,-0.993481,-0.991073,-0.94174,...,-0.898185,0.104284,0.06612,-0.069277,0.277116,-0.600329,-0.037652,-0.232059,20,SITTING


(2947, 563)


## Separating target variables from the dataset

In [3]:
# Separate subject information
subject_training_data = train['subject']
subject_testing_data = test['subject']

# Separate labels
training_labels = train['Activity']
testing_labels = test['Activity']

# Drop labels and subject info from data
train = train.drop(['subject', 'Activity'], axis=1)
test = test.drop(['subject', 'Activity'], axis=1)

# Print some information about our data
print("Training data consists of {} instances of data with {} total features".format(train.shape[0], train.shape[1]))
print("Training data includes value counts of\n", training_labels.value_counts())
print("Testing data consists of {} instances of data".format(test.shape[0]))
print("Testing data includes value counts of\n", testing_labels.value_counts())

Training data consists of 7352 instances of data with 561 total features
Training data includes value counts of
 LAYING                1407
STANDING              1374
SITTING               1286
WALKING               1226
WALKING_UPSTAIRS      1073
WALKING_DOWNSTAIRS     986
Name: Activity, dtype: int64
Testing data consists of 2947 instances of data
Testing data includes value counts of
 LAYING                537
STANDING              532
WALKING               496
SITTING               491
WALKING_UPSTAIRS      471
WALKING_DOWNSTAIRS    420
Name: Activity, dtype: int64


In [8]:
from sklearn.preprocessing import LabelEncoder

# Encode our categorical labels into numerical target labels
le = LabelEncoder()
le = le.fit(["WALKING", "WALKING_UPSTAIRS", "WALKING_DOWNSTAIRS", "SITTING", "STANDING", "LAYING"])
enc_training_labels = le.transform(training_labels)
enc_testing_labels = le.transform(testing_labels)
print(training_labels.head())
print(enc_training_labels)
print(testing_labels.head())
print(enc_testing_labels)

2948                LAYING
1985               WALKING
2865      WALKING_UPSTAIRS
6992              STANDING
7101    WALKING_DOWNSTAIRS
Name: Activity, dtype: object
[0 3 5 ..., 0 2 1]
1819    WALKING_DOWNSTAIRS
723       WALKING_UPSTAIRS
868     WALKING_DOWNSTAIRS
591     WALKING_DOWNSTAIRS
2267               SITTING
Name: Activity, dtype: object
[4 5 4 ..., 3 3 5]


## Importing Classifiers

In [21]:
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

kn = KNeighborsClassifier()
sv = SVC()
lr = LogisticRegression()

## Training KNN Classifier

In [22]:
kn.fit(train,enc_training_labels)
kn_pred = kn.predict(test)
print ("K Nearest Neighbors Test Accuracy : {}".format(accuracy_score(enc_testing_labels,kn_pred)))

K Nearest Neighbors Test Accuracy : 0.9002375296912114


## Training SVM Classifier

In [23]:
sv.fit(train,enc_training_labels)
sv_pred = sv.predict(test)
print ("Support Vector Machine Test Accuracy : {}".format(accuracy_score(enc_testing_labels,sv_pred)))

Support Vector Machine Test Accuracy : 0.9402782490668476


## Training Logistic Regression Classifier

In [24]:
lr.fit(train,enc_training_labels)
lr_pred = lr.predict(test)
print ("Logistic Regression Test Accuracy : {}".format(accuracy_score(enc_testing_labels,lr_pred)))

Logistic Regression Test Accuracy : 0.9619952494061758


## Evaluating using cross_val_score

In [25]:
def evalClfTrain(clf):
    scores = cross_val_score(clf, train, enc_training_labels)
    avg = scores.mean()
    return "Training performances: {}, \nAverage: {}".format(scores, avg)

print ("K Neighbors {}".format(evalClfTrain(kn)))
print ("Support Vector Machine {}".format(evalClfTrain(sv)))
print ("Logistic Regression {}".format(evalClfTrain(lr)))

K Neighbors Training performances: [ 0.95880914  0.96574225  0.96323529], 
Average: 0.9625955602469373
Support Vector Machine Training performances: [ 0.93107667  0.94535073  0.94362745], 
Average: 0.9400182857264712
Logistic Regression Training performances: [ 0.98164763  0.98858075  0.98120915], 
Average: 0.9838125117728803


## Profiling

In [27]:
import pandas_profiling as prof

In [28]:
data_profile = prof.ProfileReport(train)
data_profile.to_file('train_profile.html')