#Dataset: [Human Activity Recognition Using Smartphones Data Set](https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones)

### The data records the values of the accelerometer and gyroscope of a phone while a person who has the phone performs the following activities: {walking, walking up, walking down, sitting, standing, laying}. The data is taken with 30 different people. The data is the average of a time window of 2.56s of performing the activity. There are a total of 561 attributes, of which 17 are raw data and the others are derived from these signals by taking their mean, deviation, max, etc.

\
 \
### There can be a number of associated benefits from mining the data, two of which we want to explore are:  
### &nbsp; &nbsp;  (a) **Identifying the activity the user is performing from live data**  : Useful for activity tracking and fitness throughout the day.
### &nbsp; &nbsp; (b) **Identifying the user uniquely from the data while he has the phone on his person.**   : May be used in advanced biometric test for identity.

\
### &nbsp; &nbsp; The data is from a continuous signal and there are no missing values. Outliers have been cleaned   by performing low and high pass filtering. Test subjects were recorded for activity classification ensuring data quality.   
###  &nbsp; &nbsp; &nbsp; &nbsp;


------------------------------------------------------------------


In [None]:
import numpy
import pandas
import sklearn.tree
import sklearn.metrics
import sklearn.ensemble
import sklearn.naive_bayes
import sklearn.neighbors
import sklearn.svm
import sklearn.neural_network
import sklearn.model_selection

# URLs of files of the dataset
X_train_url = 'https://drive.google.com/file/d/1cInxpee5tC30hACHPNafvrfXFQ9i5Ikw/view?usp=share_link'
y_train_url = 'https://drive.google.com/file/d/1DNC7A072QdN_Id_VRBE55aBWvj9FaDe6/view?usp=share_link'
subject_train_url = 'https://drive.google.com/file/d/1FdvqWrCBtcABTT5a3vELUjP1Ye1hUzCW/view?usp=share_link'
X_test_url = 'https://drive.google.com/file/d/1v37EEGnBuNCm2n2h8ygOlHidWekMcDvO/view?usp=share_link'
y_test_url = 'https://drive.google.com/file/d/1bQMV9xq-DMVvmkmU5h-KJ018pqwgBIEM/view?usp=share_link'
subject_test_url = 'https://drive.google.com/file/d/1AON2XQ4MXxHfVyhO-SVP0keM8mKvT1pJ/view?usp=share_link'
drive_url='https://drive.google.com/uc?id='

# Downloading the datset
train_X = pandas.read_csv(drive_url+X_train_url.split('/')[-2],delim_whitespace=True,header=None)
train_y = pandas.read_csv(drive_url+y_train_url.split('/')[-2],delim_whitespace=True,header=None).values.ravel()
train_subject = pandas.read_csv(drive_url+subject_train_url.split('/')[-2],delim_whitespace=True,header=None).values.ravel()
test_X = pandas.read_csv(drive_url+X_test_url.split('/')[-2],delim_whitespace=True,header=None)
test_y = pandas.read_csv(drive_url+y_test_url.split('/')[-2],delim_whitespace=True,header=None).values.ravel()
test_subject = pandas.read_csv(drive_url+subject_test_url.split('/')[-2],delim_whitespace=True,header=None).values.ravel()

cv=5        # k for cross validation
verbose=1  # verbosity of grid search output

### Decision Tree

In [None]:
print('Classifying Movement\n')
dectree = sklearn.tree.DecisionTreeClassifier()
dectree.fit(train_X,train_y)
prediction = dectree.predict(test_X)
print('Training Error:')
print(dectree.score(train_X,train_y))
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

print('\n\n K Fold Cross Validation and Parameter Tuning:\n\n')

parameters = {
    'criterion' : ['gini','entropy'],
    'max_depth' : [3,5,7,9],
    'min_samples_split' : [6,7,9,10,11],
    }

print('Classifying Movement\n')
dectree = sklearn.model_selection.GridSearchCV(sklearn.tree.DecisionTreeClassifier(),parameters,cv=cv,verbose=verbose)
dectree = dectree.fit(train_X,train_y)
print('Best parameters:\n')
print(dectree.best_params_)
print('Accuracy Score:')
prediction = dectree.predict(test_X)
print(sklearn.metrics.accuracy_score(test_y,prediction))

Classifying Movement

Training Error:
1.0
Accuracy Score:
0.8595181540549711


 K Fold Cross Validation and Parameter Tuning:


Classifying Movement

Fitting 5 folds for each of 40 candidates, totalling 200 fits
Best parameters:

{'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 11}
Accuracy Score:
0.8547675602307431


### Random Forest

In [None]:
print('Classifying Movement\n')
randforest = sklearn.ensemble.RandomForestClassifier()
randforest.fit(train_X,train_y)
prediction = randforest.predict(test_X)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

print('\n\n K Fold Cross Validation and Parameter Tuning:\n\n')

parameters = {
    'n_estimators': [50,100,200],
    'max_features': ['sqrt', 'log2'],
    'max_depth' : [5,10,15,20],
    'criterion' :['gini', 'entropy']
}

print('Classifying Movement\n')
randforest = sklearn.model_selection.GridSearchCV(sklearn.ensemble.RandomForestClassifier(),parameters,cv=cv,verbose=verbose)
randforest = randforest.fit(train_X,train_y)
print('Best parameters:\n')
print(randforest.best_params_)
prediction = randforest.predict(test_X)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

Classifying Movement

Accuracy Score:
0.9222938581608415


 K Fold Cross Validation and Parameter Tuning:


Classifying Movement

Fitting 5 folds for each of 48 candidates, totalling 240 fits


### Naive Bayes Classifier

In [None]:
print('Classifying Movement\n')
naivebayes = sklearn.naive_bayes.GaussianNB()
naivebayes.fit(train_X,train_y)
prediction = naivebayes.predict(test_X)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

print('\n\n K Fold Cross Validation and Parameter Tuning:\n\n')

parameters = {
    'var_smoothing' : [1,1e-1,1e-2,1e-3,1e-4,1e-5,1e-6,1e-7,1e-8]
}

print('Classifying Movement\n')
naivebayes = sklearn.model_selection.GridSearchCV(sklearn.naive_bayes.GaussianNB(),parameters,cv=cv,verbose=verbose)
naivebayes = naivebayes.fit(train_X,train_y)
prediction = naivebayes.predict(test_X)
print('Best parameters:\n')
print(naivebayes.best_params_)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

Classifying Movement

Accuracy Score:
0.7702748557855447


 K Fold Cross Validation and Parameter Tuning:


Classifying Movement

Fitting 5 folds for each of 9 candidates, totalling 45 fits
Best parameters:

{'var_smoothing': 0.1}
Accuracy Score:
0.825246012894469


### KNN Classifier

In [None]:
print('Classifying Movement\n')
knn = sklearn.neighbors.KNeighborsClassifier()
knn.fit(train_X,train_y)
prediction = knn.predict(test_X)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

print('\n\n K Fold Cross Validation and Parameter Tuning:\n\n')

parameters = {
    'n_neighbors' : range(1,20),
    'weights' : ['distance','uniform']
}

print('Classifying movement\n')
knn = sklearn.model_selection.GridSearchCV(sklearn.neighbors.KNeighborsClassifier(),parameters,cv=cv,verbose=verbose)
knn = knn.fit(train_X,train_y)
print('Best parameters:\n')
print(knn.best_params_)
prediction = knn.predict(test_X)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

Classifying Movement

Accuracy Score:
0.9015948422124194


 K Fold Cross Validation and Parameter Tuning:


Classifying movement

Fitting 5 folds for each of 38 candidates, totalling 190 fits
Best parameters:

{'n_neighbors': 16, 'weights': 'distance'}
Accuracy Score:
0.9073634204275535


### SVM

In [None]:
print('Classifying Movement\n')
svm = sklearn.svm.SVC()
svm.fit(train_X,train_y)
prediction = svm.predict(test_X)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

print('\n\n K Fold Cross Validation and Parameter Tuning:\n\n')

parameters = {
    'C' : [1,10,100],
    'gamma' : [0.001,0.01,0.1],
    'degree' : [0,1,2,3]
}

print('Classifying movement\n')
svm = sklearn.model_selection.GridSearchCV(sklearn.svm.SVC(),parameters,cv=cv,verbose=verbose)
svm = svm.fit(train_X,train_y)
print('Best parameters:\n')
print(svm.best_params_)
prediction = svm.predict(test_X)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

Classifying Movement

Accuracy Score:
0.9504580929759077


 K Fold Cross Validation and Parameter Tuning:


Classifying movement

Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters:

{'C': 10, 'degree': 0, 'gamma': 0.01}
Accuracy Score:
0.9619952494061758


### ANN

In [None]:
print('Classifying Movement\n')
ann = sklearn.neural_network.MLPClassifier()
ann.fit(train_X,train_y)
prediction = ann.predict(test_X)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))

print('\n\n K Fold Cross Validation and Parameter Tuning:\n\n')

parameters = {
    'activation' : ['relu','sigmoid','tanh'],
    'max_iter' : [100,200,300],
    'batch_size' : [8,16,32,64,128],
    'hidden_layer_sizes' : ['75,100,125']
}

print('Classifying movement\n')
ann = sklearn.model_selection.GridSearchCV(sklearn.neural_network.MLPClassifier(),parameters,cv=cv,verbose=verbose)
ann = ann.fit(train_X,train_y)
print('Best parameters:\n')
print(ann.best_params_)
prediction = ann.predict(test_X)
print('Accuracy Score:')
print(sklearn.metrics.accuracy_score(test_y,prediction))