In [16]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# %matplotlib inline # plot in cell
from sklearn import metrics
import pandas as pd
from scipy import signal
from scipy import stats
from scipy.stats import kurtosis, skew

import os
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# for svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC

feature_names = ['mean_x', 'mean_y', 'mean_z', 
                'rms_x', 'rms_y', 'rms_z',
                'std_x', 'std_y', 'std_z',
                'var_x', 'var_y', 'var_z',
                'med_x', 'med_y', 'med_z',
                'min_x', 'min_y', 'min_z',
                'max_x', 'max_y', 'max_z',
                'pearsonr_xy', 'pearsonr_yz', 'pearsonr_zx',
                'mad_x', 'mad_y', 'mad_z',
                'iqr_x', 'iqr_y','iqr_z']

target_names = ['Standing', 'Walking normal', 'Jumping', 
                'Sit chair', 'Stairs up', 'Stairs down']
                #'Car Step in', 'Car Step out',
                #'Back sitting chair', 'Fall front kness lying', 
                #'Fall forward lying','Sideward lying']
            
### TIME domain 
## mean, the standard deviation, the median, the correlation, the tilt angle (TA)

def featuresFromBuffer(at):
    feat = np.zeros(30)    # return array float([ 0.,  0.,  0.,  0., ....., 0.])
                            # a vector of 21 features from each window
    x = np.array(at.iloc[:,0], dtype=np.float64)   # get value all row, column = 0
    y = np.array(at.iloc[:,1], dtype=np.float64)   # 
    z = np.array(at.iloc[:,2], dtype=np.float64)   # 
   
    
    # Average value in signal buffer for all three acceleration components (1 each)    
    means = [np.mean(i) for i in [x, y, z]]
    feat[0:3] = means
    
    # RMS value in signal buffer for all three acceleration components (1 each)
    rms = [np.sqrt(np.mean(i**2)) for i in [x, y, z]]
    feat[3:6] = rms
    
    # Standard deviation
    std = [np.std(i) for i in [x, y, z]]
    feat[6:9] = std
    
    # Variance
    var = [np.var(i) for i in [x, y, z]]
    feat[9:12] = var
    
    # Median
    med = [np.median(i) for i in [x, y, z]]
    feat[12:15] = med
    
    # Range
    Range1 = [ np.amin(i) for i in [x, y, z]]    
    feat[15:18] = Range1
    Range2 = [ np.amax(i) for i in [x, y, z]]    
    feat[18:21] = Range2
    
    # Pearsonr   
    x_pd = pd.Series(x)
    y_pd = pd.Series(y)
    z_pd = pd.Series(z)
    Pearsonr_xy = x_pd.corr(y_pd)
    feat[21] = Pearsonr_xy
    Pearsonr_yz = y_pd.corr(z_pd)
    feat[22] = Pearsonr_yz
    Pearsonr_zx = z_pd.corr(x_pd)
    feat[23] = Pearsonr_zx
    
    # Median Absolute Deviation
    mad = [stats.median_absolute_deviation(i) for i in [x, y, z]]   
    feat[24:27] = mad
    
    # Interquartile range (khoảng tứ nhị phân)
    iqr = [stats.iqr(i) for i in [x, y, z]]
    feat[27:30] = iqr
    return feat
'''      
     # SMA
    sma = sma_unit(x)+ sma_unit(y) + sma_unit(z)
    feat[30] = sma
    
    #tilt angel
    #TA = [(np.arccos(i))/9.8 for i in z]
    #feat[31] = TA

    # Covariance 
    # find out covariance with respect  rows (axis = 0)
    cov_xy = np.stack((x, y), axis = 0)
    cov_yz = np.stack((y, z), axis = 0)
    cov_zx = np.stack((z, x), axis = 0)
 
    corr_xy =  (np.cov(cov_xy))/(np.std(x)*np.std(y))
    feat[] = corr_xy
    corr_yz =  (np.cov(cov_yz))/(np.std(y)*np.std(z))
    feat[] = corr_yz
    corr_zx =  (np.cov(cov_zx))/(np.std(z)*np.std(x))
    feat[] = corr_zx
    
'''    

### Frequency Domain
##  the frequency energy, the frequency entropy, and the wavelet energy,
    
def energy(value_fft):
    en = 0.0
    for i in value_fft:
        en = en + i**2
    final_energy = en/len(value_fft)
    return final_energy

def featuresFrequency(at):
    feat_fft = np.zeros(30)
    x = np.array(at[:,0], dtype=np.float64)   # get value all row, column = 0
    y = np.array(at[:,1], dtype=np.float64)   # 
    z = np.array(at[:,2], dtype=np.float64)
    
    # Ennergy
    Energy = [energy(i) for i in [x, y, z]]
    feat_fft[0:3] = Energy
    
    # skew
    skew = [stats.skew(i) for i in [x, y, z]]   
    feat_fft[3:6] = skew
    
    kurtosis = [stats.kurtosis(i) for i in [x, y, z]]   
    feat_fft[6:9] = kurtosis
    
    return feat_fft


'      \n     # SMA\n    sma = sma_unit(x)+ sma_unit(y) + sma_unit(z)\n    feat[30] = sma\n    \n    #tilt angel\n    #TA = [(np.arccos(i))/9.8 for i in z]\n    #feat[31] = TA\n\n    # Covariance \n    # find out covariance with respect  rows (axis = 0)\n    cov_xy = np.stack((x, y), axis = 0)\n    cov_yz = np.stack((y, z), axis = 0)\n    cov_zx = np.stack((z, x), axis = 0)\n \n    corr_xy =  (np.cov(cov_xy))/(np.std(x)*np.std(y))\n    feat[] = corr_xy\n    corr_yz =  (np.cov(cov_yz))/(np.std(y)*np.std(z))\n    feat[] = corr_yz\n    corr_zx =  (np.cov(cov_zx))/(np.std(z)*np.std(x))\n    feat[] = corr_zx\n    \n'

In [3]:
import numpy as np
def entropy(labels):
    prob_dict = {x:labels.count(x)/len(labels) for x in labels}
    probs = np.array(list(prob_dict.values()))

    return - probs.dot(np.log2(probs))
labels = [0, 0, 1, 1]
i = entropy(labels)
i

1.0

In [7]:
# Import data

dt = pd.read_excel (r'F:\\Program\\OneDrive\\KHOÁ LUẬN 2020\\acc_data.xlsx')
dt.columns
dt.shape

Index(['Standing', 'Unnamed: 1', 'Unnamed: 2', 'Walking normal', 'Unnamed: 4',
       'Unnamed: 5', 'Jumping', 'Unnamed: 7', 'Unnamed: 8', 'Jogging',
       'Unnamed: 10', 'Unnamed: 11', 'Sit chair', 'Unnamed: 13', 'Unnamed: 14',
       'Stairs up', 'Unnamed: 16', 'Unnamed: 17', 'Stairs down', 'Unnamed: 19',
       'Unnamed: 20', 'Car Step-in', 'Unnamed: 22', 'Unnamed: 23',
       'Car Step-out', 'Unnamed: 25', 'Unnamed: 26', 'Back sitting chair',
       'Unnamed: 28', 'Unnamed: 29', 'Fall front knees lying', 'Unnamed: 31',
       'Unnamed: 32', 'Fall forward lying', 'Unnamed: 34', 'Unnamed: 35',
       'Sideward lying', 'Unnamed: 37', 'Unnamed: 38'],
      dtype='object')

(28939, 39)

In [8]:
Standing = dt[['Standing', 'Unnamed: 1', 'Unnamed: 2']] 
# Delete columns contain missing value (NaN or not value)
Standing = Standing.dropna()
# Create index (start from 1) in first column
Standing.index = pd.RangeIndex(len(Standing.index))
# Drop the first row ((Xoá hàng x,y,z))
Standing = Standing.drop(0)

Walking_normal = dt[['Walking normal', 'Unnamed: 4', 'Unnamed: 5']] 
Walking_normal = Walking_normal.dropna()
Walking_normal.index = pd.RangeIndex(len(Walking_normal.index))
Walking_normal = Walking_normal.drop(0)

Jumping = dt[['Jumping', 'Unnamed: 7', 'Unnamed: 8']] 
Jumping = Jumping.dropna()
Jumping.index = pd.RangeIndex(len(Jumping.index))
Jumping = Jumping.drop(0)

Sit_chair = dt[['Sit chair', 'Unnamed: 13', 'Unnamed: 14']] 
Sit_chair = Sit_chair.dropna()
Sit_chair.index = pd.RangeIndex(len(Sit_chair.index)) 
Sit_chair = Sit_chair.drop(0)

Stairs_up = dt[['Stairs up', 'Unnamed: 16', 'Unnamed: 17']] 
Stairs_up = Stairs_up.dropna()
Stairs_up.index = pd.RangeIndex(len(Stairs_up.index)) 
Stairs_up = Stairs_up.drop(0)
    
Stairs_down = dt[['Stairs down', 'Unnamed: 19', 'Unnamed: 20']] 
Stairs_down = Stairs_down.dropna()
Stairs_down.index = pd.RangeIndex(len(Stairs_down.index))
Stairs_down = Stairs_down.drop(0)

print (len(Standing))
print (len (Walking_normal))
print (len (Jumping))
print (len (Sit_chair))
print (len (Stairs_up))
print (len (Stairs_down))


28938
26158
7909
2794
3137
3160


In [9]:
#### Cell for FFT
Standing_fft = np.fft.fft(Standing)
Walking_normal_fft = np.fft.fft(Walking_normal)
Jumping_fft = np.fft.fft(Jumping)
Sit_chair_fft = np.fft.fft(Sit_chair)
Stairs_up_fft = np.fft.fft(Stairs_up)
Stairs_down_fft = np.fft.fft(Stairs_down)

In [10]:
### Cell for FFT
window_size = 10
stride = 6 #step

# range (start, stop, step)
X_stand_fft_train = [Standing_fft[i:i+window_size] for i in range(0, int(len(Standing_fft)*0.6), stride)] 
X_stand_fft_test = [Standing_fft[i:i+window_size] for i in range(int(len(Standing_fft)*0.6), len(Standing_fft), stride) 
                                            if i+window_size<=len(Standing_fft)]
X_walk_fft_train = [Walking_normal_fft[i:i+window_size] for i in range(0, int(len(Walking_normal_fft)*0.6), stride)]
X_walk_fft_test = [Walking_normal_fft[i:i+window_size] for i in range(int(len(Walking_normal_fft)*0.6), len(Walking_normal_fft),                                stride) if i+window_size<=len(Walking_normal)]

X_jump_fft_train = [Jumping_fft[i:i+window_size] for i in range(0, int(len(Jumping_fft)*0.6), stride)]
X_jump_fft_test = [Jumping_fft[i:i+window_size] for i in range(int(len(Jumping_fft)*0.6), len(Jumping_fft), stride) 
                                            if i+window_size<=len(Jumping_fft)]

X_sit_fft_train = [Sit_chair_fft[i:i+window_size] for i in range(0, int(len(Sit_chair_fft)*0.6), stride)] 
X_sit_fft_test = [Sit_chair_fft[i:i+window_size] for i in range(int(len(Sit_chair_fft)*0.6), len(Sit_chair_fft), stride) 
                                            if i+window_size<=len(Sit_chair_fft)]

X_stairUp_fft_train = [Stairs_up_fft[i:i+window_size] for i in range(0, int(len(Stairs_up_fft)*0.6), stride)] 
X_stairUp_fft_test = [Stairs_up_fft[i:i+window_size] for i in range(int(len(Stairs_up_fft)*0.6), len(Stairs_up_fft), stride) 
                                                if i+window_size<=len(Stairs_up_fft)]

X_stairDown_fft_train = [Stairs_down_fft[i:i+window_size] for i in range(0, int(len(Stairs_down_fft)*0.6), stride)] 
X_stairDown_fft_test = [Stairs_down_fft[i:i+window_size] for i in range(int(len(Stairs_down_fft)*0.6), len(Stairs_down_fft), 
                                                            stride) if i+window_size<=len(Stairs_down_fft)]

print ('X_stand_fft_train: ', len(X_stand_fft_train))
print ('X_stand_fft_test: ', len(X_stand_fft_test))

print ('X_walk_fft_train: ', len(X_walk_fft_train))
print ('X_walk_fft_test: ', len(X_walk_fft_test))

print ('X_jump_fft_train: ', len (X_jump_fft_train))
print ('X_jump_fft_test: ', len (X_jump_fft_test))


print ('X_sit_fft_train: ', len (X_sit_fft_train))
print ('X_sit_fft_test: ', len (X_sit_fft_test))

print ('X_stairUp_fft_train: ', len (X_stairUp_fft_train))
print ('X_stairUp_fft_test: ', len (X_stairUp_fft_test))

print ('X_stairDown_fft_train: ', len (X_stairDown_fft_train))
print ('X_stairDown_fft_test: ', len (X_stairDown_fft_test))

X_stand_fft_train:  2894
X_stand_fft_test:  1928
X_walk_fft_train:  2616
X_walk_fft_test:  1743
X_jump_fft_train:  791
X_jump_fft_test:  526
X_sit_fft_train:  280
X_sit_fft_test:  185
X_stairUp_fft_train:  314
X_stairUp_fft_test:  208
X_stairDown_fft_train:  316
X_stairDown_fft_test:  210


In [11]:
## FFT Frequency

train_fft_data = []
train_fft_label = []

test_fft_data = []
test_fft_label = []

for acts in X_stand_fft_train:
    train_fft_data.append(acts)
    train_fft_label.append(0)
    
for acts in X_walk_fft_train:
    train_fft_data.append(acts)
    train_fft_label.append(1)

for acts in X_jump_fft_train:
    train_fft_data.append(acts)
    train_fft_label.append(2)

for acts in X_sit_fft_train:
    train_fft_data.append(acts)
    train_fft_label.append(3)

for acts in X_stairUp_fft_train:
    train_fft_data.append(acts)
    train_fft_label.append(4)

for acts in X_stairDown_fft_train:
    train_fft_data.append(acts)
    train_fft_label.append(5)

print('train_fft_data length: ', len(train_fft_data) )
print('train_fft_label length: ', len(train_fft_label))

# For TEST

for acts in X_stand_fft_test:
    test_fft_data.append(acts)
    test_fft_label.append(0)

for acts in X_walk_fft_test:
    test_fft_data.append(acts)
    test_fft_label.append(1)

for acts in X_jump_fft_test:
    test_fft_data.append(acts)
    test_fft_label.append(2)

for acts in X_sit_fft_test:
    test_fft_data.append(acts)
    test_fft_label.append(3)

for acts in X_stairUp_fft_test:
    test_fft_data.append(acts)
    test_fft_label.append(4)

for acts in X_stairDown_fft_test:
    test_fft_data.append(acts)
    test_fft_label.append(5)


print('test_fft-data length: ', len(test_fft_data))
print('test_fft-label length: ', len(test_fft_label))

train_fft_data length:  7211
train_fft_label length:  7211
test_fft-data length:  4800
test_fft-label length:  4800


In [12]:
# TIME
# Split dataset to 2 parts: Train (60%) - Test (40%)\n",

window_size = 10
stride = 6 #step

# range (start, stop, step)
X_stand_train = [Standing[i:i+window_size] for i in range(0, int(len(Standing)*0.6), stride)] 
X_stand_test = [Standing[i:i+window_size] for i in range(int(len(Standing)*0.6), len(Standing), stride) 
                                            if i+window_size<=len(Standing)]

X_walk_train = [Walking_normal[i:i+window_size] for i in range(0, int(len(Walking_normal)*0.6), stride)]
X_walk_test = [Walking_normal[i:i+window_size] for i in range(int(len(Walking_normal)*0.6), len(Walking_normal),                                stride) if i+window_size<=len(Walking_normal)]

X_jump_train = [Jumping[i:i+window_size] for i in range(0, int(len(Jumping)*0.6), stride)]
X_jump_test = [Jumping[i:i+window_size] for i in range(int(len(Jumping)*0.6), len(Jumping), stride) 
                                            if i+window_size<=len(Jumping)]

X_sit_train = [Sit_chair[i:i+window_size] for i in range(0, int(len(Sit_chair)*0.6), stride)] 
X_sit_test = [Sit_chair[i:i+window_size] for i in range(int(len(Sit_chair)*0.6), len(Sit_chair), stride) 
                                            if i+window_size<=len(Sit_chair)]

X_stairUp_train = [Stairs_up[i:i+window_size] for i in range(0, int(len(Stairs_up)*0.6), stride)] 
X_stairUp_test = [Stairs_up[i:i+window_size] for i in range(int(len(Stairs_up)*0.6), len(Stairs_up), stride) 
                                                if i+window_size<=len(Stairs_up)]

X_stairDown_train = [Stairs_down[i:i+window_size] for i in range(0, int(len(Stairs_down)*0.6), stride)] 
X_stairDown_test = [Stairs_down[i:i+window_size] for i in range(int(len(Stairs_down)*0.6), len(Stairs_down), 
                                                            stride) if i+window_size<=len(Stairs_down)]

print ('X_stand_train: ', len(X_stand_train))
print ('X_stand_test: ', len(X_stand_test))

print ('X_walk_train: ', len(X_walk_train))
print ('X_walk_test: ', len(X_walk_test))

print ('X_jump_train: ', len (X_jump_train))
print ('X_jump_test: ', len (X_jump_test))


print ('X_sit_train: ', len (X_sit_train))
print ('X_sit_test: ', len (X_sit_test))

print ('X_stairUp_train: ', len (X_stairUp_train))
print ('X_stairUp_test: ', len (X_stairUp_test))

print ('X_stairDown_train: ', len (X_stairDown_train))
print ('X_stairDown_test: ', len (X_stairDown_test))


X_stand_train:  2894
X_stand_test:  1928
X_walk_train:  2616
X_walk_test:  1743
X_jump_train:  791
X_jump_test:  526
X_sit_train:  280
X_sit_test:  185
X_stairUp_train:  314
X_stairUp_test:  208
X_stairDown_train:  316
X_stairDown_test:  210


In [13]:
### Time Domain

train_data = []
train_label = []

test_data = []
test_label = []

for acts in X_stand_train:
    train_data.append(acts)
    train_label.append(0)
    
for acts in X_walk_train:
    train_data.append(acts)
    train_label.append(1)

for acts in X_jump_train:
    train_data.append(acts)
    train_label.append(2)

for acts in X_sit_train:
    train_data.append(acts)
    train_label.append(3)

for acts in X_stairUp_train:
    train_data.append(acts)
    train_label.append(4)

for acts in X_stairDown_train:
    train_data.append(acts)
    train_label.append(5)

print('train-data length: ', len(train_data) )
print('train-label length: ', len(train_label) )
#print(train_label)
      
# For TEST

for acts in X_stand_test:
    test_data.append(acts)
    test_label.append(0)

for acts in X_walk_test:
    test_data.append(acts)
    test_label.append(1)

for acts in X_jump_test:
    test_data.append(acts)
    test_label.append(2)

for acts in X_sit_test:
    test_data.append(acts)
    test_label.append(3)

for acts in X_stairUp_test:
    test_data.append(acts)
    test_label.append(4)

for acts in X_stairDown_test:
    test_data.append(acts)
    test_label.append(5)


print('test-data length: ', len(test_data))
print('test-label length: ', len(test_label))

train-data length:  7211
train-label length:  7211
test-data length:  4800
test-label length:  4800


In [14]:
# features array for time domain
train_time_features = []
test_time_features = []
for action in train_data:
    feat_time = featuresFromBuffer(action)
    train_time_features.append(feat_time)  

for action in test_data:
    feat_time = featuresFromBuffer(action)
    #print(feat)
    test_time_features.append(feat_time)
    #print(test_features)

#print (train_features)
len(train_time_features)
len(test_time_features)

7211

4800

In [17]:
# features array for FFT

train_fft_features = []
test_fft_features = []
for action in train_fft_data:
    feat_fft = featuresFrequency(action)
    train_fft_features.append(feat_fft)  

for action in test_fft_data:
    feat_fft = featuresFrequency(action)
    #print(feat)
    test_fft_features.append(feat_fft)
    #print(test_features)

len(train_fft_features)
len(test_fft_features)




7211

4800

In [18]:
train_features = train_time_features + train_fft_features
print(len(train_features))

train_labels = train_label + train_fft_label
print(len(train_labels))

test_features = test_time_features + test_fft_features
print(len(test_features))

test_labels = test_label + test_fft_label
print(len(test_labels))

14422
14422
9600
9600


In [None]:
# beginning of classification:
#https://www.kaggle.com/beagle01/prediction-with-gradient-boosting-classifier

#clf.fit(list(X_train),Y_train)

print("Gradient Boosting Decision Tree:")
from sklearn.ensemble import GradientBoostingClassifier
clf1 = GradientBoostingClassifier(learning_rate=0.05,max_depth=3,n_estimators=100).fit(train_features, train_labels)
#format: pass score in {:.3f}
print('Accuracy of GBDT classifier on training set: {:.3f}'
     .format(clf1.score(train_features, train_labels)))
print('Accuracy of GBDT classifier on test set: {:.3f}'
     .format(clf1.score(test_features, test_labels)))


print("\n\nDecision Tree:") 
from sklearn.tree import DecisionTreeClassifier
from adspy_shared_utilities import plot_decision_tree  # Nên giữ file adspy_ cùng thư mục vs các file python to use adspy_.

clf2 = DecisionTreeClassifier(max_depth=4).fit(train_features, train_labels)

print('Accuracy of Decision Tree classifier on training set: {:.3f}'.format(clf2.score(train_features, train_labels)))
print('Accuracy of Decision Tree classifier on test set: {:.3f}'
.format(clf2.score(test_features, test_labels)))


print("\n\nSVM:")
clf3 = SVC(C=100, gamma='scale').fit(train_features, train_labels)
print("Accuracy on training set: {:.2f}".format(clf3.score(train_features, train_labels)))
print("Accuracy on test set: {:.2f}".format(clf3.score(test_features, test_labels)))


print('\n\n Random Forests: ')
from sklearn.ensemble import RandomForestClassifier
clf4 = RandomForestClassifier(n_estimators=200, random_state=0).fit(train_features, train_labels)

print('Accuracy of RF classifier on training set: {:.3f}'
     .format(clf4.score(train_features, train_labels)))
print('Accuracy of RF classifier on test set: {:.3f}'
     .format(clf4.score(test_features, test_labels)))


print('\n\n KNeighbor: ')
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 5, weights = 'distance').fit(train_features, train_labels)
print('Accuracy of K-NN classifier on training set: {:.2f}'
     .format(knn.score(train_features, train_labels)))
print('Accuracy of K-NN classifier on test set: {:.2f}'
     .format(knn.score(test_features, test_labels)))

Gradient Boosting Decision Tree:
