# Life of a Data Scientist: Human Activity

Data Set: Human Actiity Recognition With Smartphones
The Human Activity Recognition database was built from the recordings of 30 study participants performing activities of daily living (ADL) while carrying a waist-mounted smartphone with-mounted smartphone with embedded inertial sensors.

In [1]:
#IMPORTS
import pandas as pd
import numpy as np
import sklearn as sk




In [2]:
#Reading Data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [3]:
#Shuffle Shuffle
from sklearn.utils import shuffle
train = shuffle(train)
test = shuffle(test)

In [4]:
#Splitting up features and labels
labels_train = train.ix[:, 562]
features_train = train.ix[:,:561]
labels_test = test.ix[:, 562]
features_test = test.ix[:,:561]

testData = test.drop("Activity", axis = 1).values

In [5]:
# Makes categorical labels into numbers 1-6
def encode(df):
    dummies = pd.get_dummies(df, drop_first=False)
    d = np.array([])
    for i in range(len(dummies)):
        d = np.append(d, dummies.values[i].argmax())
    return d

In [6]:
#Encode labels
labels_train = encode(labels_train)
labels_test = encode(labels_test)

In [14]:
#KNN
from sklearn.neighbors import KNeighborsClassifier
KNNmodel = KNeighborsClassifier(algorithm = 'kd_tree', n_neighbors = 6)
KNNmodel.fit(features_train, labels_train)
labels_predict = KNNmodel.predict(features_test)
A = KNNmodel.kneighbors_graph(features_train)
A.toarray()
print(labels_predict)
print(KNNmodel.score(features_test, labels_test))

[ 1.  5.  5. ...,  5.  3.  0.]
0.901934170343


In [20]:
#Confusion Matrix
import itertools
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Compute confusion matrix
cnf_matrix = confusion_matrix(labels_test, labels_predict)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix

plot_confusion_matrix(cnf_matrix, classes=["Walking", "Walking Up", "Walking Down", 
                                           "Sitting", "Standing", "Laying Down"],
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix

plot_confusion_matrix(cnf_matrix, classes=["Walking", "Walking Up", "Walking Down", 
                                           "Sitting", "Standing", "Laying Down"], normalize=True,
                      title='Normalized confusion matrix')



Confusion matrix, without normalization
[[535   1   1   0   0   0]
 [  1 420  68   0   0   2]
 [  0  57 475   0   0   0]
 [  0   0   0 486   9   1]
 [  0   0   0  55 326  39]
 [  0   0   0  45  10 416]]
Normalized confusion matrix
[[ 1.    0.    0.    0.    0.    0.  ]
 [ 0.    0.86  0.14  0.    0.    0.  ]
 [ 0.    0.11  0.89  0.    0.    0.  ]
 [ 0.    0.    0.    0.98  0.02  0.  ]
 [ 0.    0.    0.    0.13  0.78  0.09]
 [ 0.    0.    0.    0.1   0.02  0.88]]
