In [2]:
#import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn import preprocessing

In [3]:
#load the main dataset used divide in training set and testing set 
train_set = pd.read_csv("train.csv", sep=",")
test_set = pd.read_csv("test.csv", sep=",")

In [4]:
train_set.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING


We divide the train_set and test_set data to make our GDA model multi-classification, as follow:

In [83]:
x_train_set = train_set.drop(['Activity','subject'], axis=1).values
y_train_set = train_set[['Activity']].values

The same for the test_set.

In [85]:
x_test_set = test_set.drop(['Activity','subject'], axis=1).values
y_test_set = test_set[['Activity']].values

### Encoding Labels

Encodes the activity labels to numerical labels.

In [86]:
le = preprocessing.LabelEncoder()
y_train_set = le.fit_transform(y_train_set)

le = preprocessing.LabelEncoder()
y_test_set = le.fit_transform(y_test_set)

See the actual corresponding classes.

In [87]:
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_name_mapping)

{'LAYING': 0, 'SITTING': 1, 'STANDING': 2, 'WALKING': 3, 'WALKING_DOWNSTAIRS': 4, 'WALKING_UPSTAIRS': 5}


Each activity corresponds now to the numerical label

In [88]:
y_train_set

array([2, 2, 2, ..., 5, 5, 5])

In [89]:
y_test_set

array([2, 2, 2, ..., 5, 5, 5])

## GDA Multi-Classification OVA

Compute the phi y parameter, now we are inserting the methods that estimate the parameters for the GDA analysis.

In [90]:
def compute_phi(y):
    return np.mean(y)

Compute the mean for the first vector and second vector

In [91]:
def compute_mu0(x, y):
    indicator_f = 1-y.reshape(y.shape[0], 1)

    sum_x = 0
    for i in range(0, x.shape[0]):
        sum_x +=indicator_f[i] * x[i]
    
    return sum_x / np.sum(indicator_f)


def compute_mu1(x, y):
    indicator_f = y.reshape(y.shape[0], 1)
    
    sum_x = 0
    for i in range(0, x.shape[0]):
        sum_x += indicator_f[i] * x[i]
    
    return sum_x / np.sum(indicator_f)

Compute sigma

In [92]:
def compute_sigma(x, y, mu0, mu1):
    my_sum = 0
    for i in range(0, x.shape[0]):
        if y[i]==0:
            my_sum+=(x[i] - mu0).reshape(x.shape[1], 1) @ (x[i] - mu0).reshape(x.shape[1], 1).T
        else:
            my_sum+=(x[i] - mu1).reshape(x.shape[1], 1) @ (x[i] - mu1).reshape(x.shape[1], 1).T
            
    return (1 / x.shape[0]) * (my_sum)

Write the probability of x give y.

In [93]:
# computing p(x|y) for the Bayes rule
def p_x_given_y(x,mu,sigma):  
    d = x.shape[0]
    return (1 / (((2*np.pi)**(d/2)) * (np.linalg.det(sigma)**0.5))) * np.exp(-0.5*(x - mu) @ np.linalg.inv(sigma) @ (x - mu))

# Function p(y) for applying the Bayes rule
def p_y(y,phi):
    if abs(y)==1: return phi
    else: return 1 - phi

In [95]:
# Now estimate the GDA parameters and start for one vs all
selected_example=1
print('Selected example =', x_test_set[selected_example,:], "\n")

for label in [0, 1, 2, 3, 4, 5]:
    y = y_train_set.copy()
    y = np.where(y != label, -1, y) # the rest is set to -1
    y = np.where(y == label, 0, y) # the value point that we want to compare with the rest is set to zero
    y = abs(y)
    
    phi = compute_phi(y)
    mu0 = compute_mu0(x_train_set, y)
    mu1 = compute_mu1(x_train_set, y)
    sigma = compute_sigma(x_train_set, y, mu0, mu1)

    # compute p(y=l|x) ~ p(x|y=0)*p(y=0)  &  p(y=1|x) ~ p(x|y=1)*p(y=1) where l is one class

    # y=0
    print('p(y=' + str(label) + '|x) ~', p_x_given_y(x_test_set[selected_example,:],mu0,sigma)*p_y(0, phi))

    # y=1
    print('p(y=1|x) ~', p_x_given_y(x_test_set[selected_example,:],mu1,sigma)*p_y(1, phi))

Selected example = [ 0.28602671 -0.01316336 -0.11908252 -0.97541469 -0.9674579  -0.94495817
 -0.9867988  -0.96840133 -0.9458234  -0.89408755 -0.55457721 -0.80601325
  0.76803131  0.68369799  0.79670578 -0.96909654 -0.99957952 -0.99964558
 -0.99772028 -0.9940063  -0.97363671 -0.95071985 -0.30243696 -0.34824317
 -0.40478535  0.5074924  -0.15649451  0.04067368  0.27299122  0.19756581
 -0.19455563  0.41141089 -0.3404662   0.07755506 -0.08402448  0.03530499
 -0.01008253 -0.1049829  -0.42913351  0.39917698  0.92740359 -0.28921515
  0.15256831 -0.98905711 -0.9838872  -0.96478113 -0.98899958 -0.98315936
 -0.96520631  0.85617578 -0.30487004  0.15289519  0.9444614  -0.26215956
  0.14901318  0.05767649  0.80557472 -0.85816306 -0.95723959 -0.98840345
 -0.98168751 -0.97121523 -0.72888396 -1.         -0.46504709 -0.50988188
  0.52542419 -0.54024472  0.55444699 -0.74568691  0.73317067 -0.73739047
  0.74869829 -0.8451508   0.86929012 -0.89306457  0.91311888  0.94523268
 -0.9114153  -0.73853452  0.0701

  return (1 / (((2*np.pi)**(d/2)) * (np.linalg.det(sigma)**0.5))) * np.exp(-0.5*(x - mu) @ np.linalg.inv(sigma) @ (x - mu))
  return (1 / (((2*np.pi)**(d/2)) * (np.linalg.det(sigma)**0.5))) * np.exp(-0.5*(x - mu) @ np.linalg.inv(sigma) @ (x - mu))


p(y=0|x) ~ inf
p(y=1|x) ~ inf


LinAlgError: Singular matrix