# Human Activity Recognition with Smartphones

## SVM

In [4]:
import pandas as pd
import numpy as np

In [5]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import preprocessing

In [6]:
from sklearn.svm import SVC

In [7]:
from sklearn.metrics import classification_report, confusion_matrix

### Exploratory Data Analysis

In [8]:
training_set = pd.read_csv("train.csv")

In [9]:
training_set.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING


In [10]:
training_set.shape

(7352, 563)

## Data Preprocessing

In [13]:
#prendo le etichette
y = training_set.Activity.values.astype(object)
#rimuovo le colonne: activity e subject
X_train = pd.DataFrame(training_set.drop(['Activity','subject'],axis=1))

In [14]:
encoder = preprocessing.LabelEncoder()
encoder.fit(y)
y_train = encoder.transform(y)
y_train.shape

(7352,)

In [15]:
encoder.classes_

array(['LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS',
       'WALKING_UPSTAIRS'], dtype=object)

## Training the Algorithm

In [11]:
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [12]:
poly_svclassifier = SVC(kernel='poly', degree=8)
poly_svclassifier.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=8, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [13]:
rbf_svclassifier = SVC(kernel='rbf', C=100.0)
rbf_svclassifier.fit(X_train, y_train)

SVC(C=100.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [14]:
sigmoid_svclassifier = SVC(kernel='sigmoid')
sigmoid_svclassifier.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='sigmoid',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

## Making Predictions

In [29]:
testing_set = pd.read_csv("test.csv")

In [30]:
testing_set.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,0.257178,-0.023285,-0.014654,-0.938404,-0.920091,-0.667683,-0.952501,-0.925249,-0.674302,-0.894088,...,-0.705974,0.006462,0.16292,-0.825886,0.271151,-0.720009,0.276801,-0.057978,2,STANDING
1,0.286027,-0.013163,-0.119083,-0.975415,-0.967458,-0.944958,-0.986799,-0.968401,-0.945823,-0.894088,...,-0.594944,-0.083495,0.0175,-0.434375,0.920593,-0.698091,0.281343,-0.083898,2,STANDING
2,0.275485,-0.02605,-0.118152,-0.993819,-0.969926,-0.962748,-0.994403,-0.970735,-0.963483,-0.93926,...,-0.640736,-0.034956,0.202302,0.064103,0.145068,-0.702771,0.280083,-0.079346,2,STANDING
3,0.270298,-0.032614,-0.11752,-0.994743,-0.973268,-0.967091,-0.995274,-0.974471,-0.968897,-0.93861,...,-0.736124,-0.017067,0.154438,0.340134,0.296407,-0.698954,0.284114,-0.077108,2,STANDING
4,0.274833,-0.027848,-0.129527,-0.993852,-0.967445,-0.978295,-0.994111,-0.965953,-0.977346,-0.93861,...,-0.846595,-0.002223,-0.040046,0.736715,-0.118545,-0.692245,0.290722,-0.073857,2,STANDING


In [31]:
#prendo le etichette
y_test = testing_set.Activity.values.astype(object)
#rimuovo le colonne: activity e subject
X_test_set = pd.DataFrame(testing_set.drop(['Activity','subject'],axis=1))

In [32]:
encoder = preprocessing.LabelEncoder()
encoder.fit(y_test)
y_test = encoder.transform(y_test)
y_test.shape

(2947,)

In [33]:
y_pred = svclassifier.predict(X_test_set)

NameError: name 'svclassifier' is not defined

In [20]:
y_pred

array([2, 2, 2, ..., 5, 5, 5])

In [89]:
poly_y_pred = poly_svclassifier.predict(X_test_set)

In [97]:
rbf_y_pred = rbf_svclassifier.predict(X_test_set)

In [91]:
sigmoid_y_pred = sigmoid_svclassifier.predict(X_test_set)

## Evaluating the Algorithm

### Linear Kernel

In [85]:
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[537   0   0   0   0   0]
 [  0 435  54   0   0   2]
 [  0  16 516   0   0   0]
 [  0   0   0 492   3   1]
 [  0   0   0   4 410   6]
 [  0   0   0  18   2 451]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.96      0.89      0.92       491
           2       0.91      0.97      0.94       532
           3       0.96      0.99      0.97       496
           4       0.99      0.98      0.98       420
           5       0.98      0.96      0.97       471

    accuracy                           0.96      2947
   macro avg       0.97      0.96      0.96      2947
weighted avg       0.96      0.96      0.96      2947



### Polynomial Kernel

In [94]:
print(confusion_matrix(y_test,poly_y_pred))
print(classification_report(y_test,poly_y_pred))

[[537   0   0   0   0   0]
 [  2 450  39   0   0   0]
 [  0   8 524   0   0   0]
 [  0   0   0 479  12   5]
 [  0   0   0   7 396  17]
 [  0   0   0  26   4 441]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.98      0.92      0.95       491
           2       0.93      0.98      0.96       532
           3       0.94      0.97      0.95       496
           4       0.96      0.94      0.95       420
           5       0.95      0.94      0.94       471

    accuracy                           0.96      2947
   macro avg       0.96      0.96      0.96      2947
weighted avg       0.96      0.96      0.96      2947



### RBF Kernel

In [98]:
print(confusion_matrix(y_test,rbf_y_pred))
print(classification_report(y_test,rbf_y_pred))

[[537   0   0   0   0   0]
 [  0 443  46   0   0   2]
 [  0   8 524   0   0   0]
 [  0   0   0 489   3   4]
 [  0   0   0   4 398  18]
 [  0   0   0  16   1 454]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.98      0.90      0.94       491
           2       0.92      0.98      0.95       532
           3       0.96      0.99      0.97       496
           4       0.99      0.95      0.97       420
           5       0.95      0.96      0.96       471

    accuracy                           0.97      2947
   macro avg       0.97      0.96      0.96      2947
weighted avg       0.97      0.97      0.97      2947



### Sigmoid Kernel

In [93]:
print(confusion_matrix(y_test,sigmoid_y_pred))
print(classification_report(y_test,sigmoid_y_pred))

[[537   0   0   0   0   0]
 [  0 263 223   4   0   1]
 [  0  25 503   4   0   0]
 [  0   0   1 475  20   0]
 [  0   0   0  81 304  35]
 [  0   0   4   9   3 455]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.91      0.54      0.68       491
           2       0.69      0.95      0.80       532
           3       0.83      0.96      0.89       496
           4       0.93      0.72      0.81       420
           5       0.93      0.97      0.95       471

    accuracy                           0.86      2947
   macro avg       0.88      0.85      0.85      2947
weighted avg       0.88      0.86      0.86      2947



In [99]:
params={'kernel':['linear','rbf'],'C':[1,10,100],'gamma':[1e-2,1e-3,1e-4]}

In [105]:
classifier=GridSearchCV(estimator=SVC(),param_grid=params,scoring='accuracy',return_train_score=True)

In [106]:
classifier.fit(X_train,y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [1, 10, 100], 'gamma': [0.01, 0.001, 0.0001],
                         'kernel': ['linear', 'rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
             scoring='accuracy', verbose=0)

In [107]:
classifier.best_params_

{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}

In [109]:
classifier.best_estimator_

SVC(C=10, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [110]:
f_SVM = SVC(C=10, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [112]:
f_SVM.fit(X_train,y_train)

SVC(C=10, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [113]:
print(confusion_matrix(y_test,sigmoid_y_pred))
print(classification_report(y_test,sigmoid_y_pred))

[[537   0   0   0   0   0]
 [  0 263 223   4   0   1]
 [  0  25 503   4   0   0]
 [  0   0   1 475  20   0]
 [  0   0   0  81 304  35]
 [  0   0   4   9   3 455]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.91      0.54      0.68       491
           2       0.69      0.95      0.80       532
           3       0.83      0.96      0.89       496
           4       0.93      0.72      0.81       420
           5       0.93      0.97      0.95       471

    accuracy                           0.86      2947
   macro avg       0.88      0.85      0.85      2947
weighted avg       0.88      0.86      0.86      2947



# GDA

In [52]:
def compute_phi(y):
    return np.mean(y)

#def compute_sigma(x, y, mu, y_classes):
#    my_sum = 0
#    x_mu = x.copy()
#    for i in range(x.shape[0]):
#        if y[i]==y_classes[i]:
#            x_mu[i] = x[i] - mu[i]
#        my_sum+=(x_mu[i]).reshape(x.shape[1], 1) @ (x_mu[i]).reshape(x.shape[1], 1).T
#    return (1/x.shape[0]) * (my_sum)
def compute_sigma(x, y, mu, y_classes):
    x_u = x.copy()
    for i in range(len(mu)):
        x_u[y==y_classes[i]] -= mu[i]
    return x_u.T.dot(x_u) / len(y)


# computing p(x|y) for the Bayes rule
def p_x_given_y(data, mu, sigma, p_y): 
    #score = np.apply_along_axis(lambda x: (np.exp(-0.5 * np.sum((x - mu).dot(pseudo_inv_sigma) * (x - mu), axis =1))) * p_y, 1, data)
    #x.shape: (7352, 561)
    #mu.shape: (6, 561)
    # we have to work along axis 1
    pseudo_inv_sigma = np.linalg.pinv(sigma)
    p=[]
    for i in range(0,6):
        score = np.apply_along_axis(lambda x: np.argmax((np.exp(-0.5*(x - mu[i]) @ pseudo_inv_sigma @ (x - mu[i])) * p_y)), 1, data)
        print(score)
        p.append(score)
    return p
    
def predict(data, mu, sigma, p_y):
    pseudo_inv_sigma = np.linalg.pinv(sigma)
    return np.apply_along_axis(lambda x: (np.argmax(np.exp(-0.5 * np.sum((x - mu).dot(pseudo_inv_sigma) * (x - mu), axis =1)) * p_y)), 1, data)

def score(x, y, mu, sigma, p_y):
    return (predict(x, mu, sigma, p_y) == y).mean()

def get_prob(mu, sigma, p_y):
    pseudo_inv_sigma = np.linalg.pinv(sigma)
    p = np.exp(-0.5 * np.sum((x - mu).dot(pseudo_inv_sigma) * (x - mu), axis =1)) * p_y
    print(p)
    return np.argmax(p)

In [35]:
X_train.shape

(7352, 561)

In [36]:
y_train.shape

(7352,)

In [37]:
y_classes, y_counts = np.unique(y_train, return_counts=True)
y_classes, y_counts

(array([0, 1, 2, 3, 4, 5]),
 array([1407, 1286, 1374, 1226,  986, 1073], dtype=int64))

In [38]:
p_y = y_counts/len(y_train)
p_y

array([0.1913765 , 0.17491839, 0.18688792, 0.16675734, 0.13411317,
       0.14594668])

In [39]:
mu = np.array([ X_train[y_train==k].mean(axis=0) for k in y_classes])
mu.shape, mu[1].shape, len(mu)

((6, 561), (561,), 6)

In [40]:
sigma = compute_sigma(X_train, y_train, mu, y_classes)
sigma.shape

(561, 561)

In [41]:
from numpy.linalg import inv
import random

In [42]:
#dirty_sigma = sigma.copy()
#dirty_sigma.shape
#print(dirty_sigma)0.004877           0.000405
#for cell in np.nditer(dirty_sigma, op_flags=['readwrite']):
#    cell=cell+0.00001*np.random.rand(2, 5)
    
#inv(np.matrix(dirty_sigma))


In [43]:
inv_sigma = np.linalg.pinv(sigma) #pseudo-inverse of a matrix
inv_sigma.shape

(561, 561)

In [45]:
dirty_sigma = sigma.copy()
dirty_sigma += np.ones_like(sigma) * 1e-10
np.linalg.det(dirty_sigma), np.linalg.inv(dirty_sigma).shape

(-0.0, (561, 561))

In [46]:
p_x_given_y = p_x_given_y(X_train, mu, sigma, p_y)

[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]


In [171]:
p_x_given_y

[7038, 7229, 5511, 5199, 5221, 3040]

In [49]:
score(X_test_set,y_test, mu, sigma, p_y) # * p_y

0.9253478113335596

In [51]:
score(X_test_set,y_test, mu, sigma, p_y) # * p_y

0.9260264675941635

In [152]:
p = []
p.append(2)
p.append(1)
p.append(3)
p.append(1)
p.append(1)
p.append(5)
np.argmax(p)

5