In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# %matplotlib inline # plot in cell
from sklearn import metrics
import pandas as pd
from scipy import signal

import os
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# for svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC

feature_names = ['mean_x', 'mean_y', 'mean_z', 
                'rms_x', 'rms_y', 'rms_z',
                'std_x', 'std_y', 'std_z',
                'var_x', 'var_y', 'var_z',
                'med_x', 'med_y', 'med_z',
                'min_x', 'min_y', 'min_z',
                'max_x', 'max_y', 'max_z']

target_names = ['Standing', 'Walking normal', 'Jumping', 
                'Jogging', 
                'Sit chair', 'Stairs up', 'Stairs down']
                #'Car Step in', 'Car Step out',
                #'Back sitting chair', 'Fall front kness lying', 
                #'Fall forward lying','Sideward lying']

def featuresFromBuffer(at):
    feat = np.zeros(21)
    x = np.array(at.iloc[:,0], dtype=np.float64) 
    y = np.array(at.iloc[:,1], dtype=np.float64)
    z = np.array(at.iloc[:,2], dtype=np.float64)
    
    # Average value in signal buffer 
    means = [np.mean(i) for i in [x, y, z]]
    feat[0:3] = means
    
    # RMS value in signal buffer 
    rms = [np.sqrt(np.mean(i**2)) for i in [x, y, z]]
    feat[3:6] = rms
    
    # Standard deviation
    std = [np.std(i) for i in [x, y, z]]
    feat[6:9] = std
  
    # Variance
    var = [np.var(i) for i in [x, y, z]]
    feat[9:12] = var
    
    # Median
    med = [np.median(i) for i in [x, y, z]]
    feat[12:15] = med
       
    # Range
    Range1 = [ np.amin(i) for i in [x, y, z]]   
    feat[15:18] = Range1
    Range2 = [ np.amax(i) for i in [x, y, z]]    
    feat[18:21] = Range2
    
    return feat

In [2]:
# Import data

dt = pd.read_excel (r'F:\\Program\\OneDrive\\KHOÁ LUẬN 2020\\acc_data.xlsx')
dt.columns
dt.shape

Index(['Standing', 'Unnamed: 1', 'Unnamed: 2', 'Walking normal', 'Unnamed: 4',
       'Unnamed: 5', 'Jumping', 'Unnamed: 7', 'Unnamed: 8', 'Jogging',
       'Unnamed: 10', 'Unnamed: 11', 'Sit chair', 'Unnamed: 13', 'Unnamed: 14',
       'Stairs up', 'Unnamed: 16', 'Unnamed: 17', 'Stairs down', 'Unnamed: 19',
       'Unnamed: 20', 'Car Step-in', 'Unnamed: 22', 'Unnamed: 23',
       'Car Step-out', 'Unnamed: 25', 'Unnamed: 26', 'Back sitting chair',
       'Unnamed: 28', 'Unnamed: 29', 'Fall front knees lying', 'Unnamed: 31',
       'Unnamed: 32', 'Fall forward lying', 'Unnamed: 34', 'Unnamed: 35',
       'Sideward lying', 'Unnamed: 37', 'Unnamed: 38'],
      dtype='object')

(28939, 39)

In [3]:
Standing = dt[['Standing', 'Unnamed: 1', 'Unnamed: 2']] 
# Delete columns contain missing value (NaN or not value)
Standing = Standing.dropna()
# Create index (start from 1) in first column
Standing.index = pd.RangeIndex(len(Standing.index))
# Drop the first row ((Xoá hàng x,y,z))
Standing = Standing.drop(0)

Walking_normal = dt[['Walking normal', 'Unnamed: 4', 'Unnamed: 5']] 
Walking_normal = Walking_normal.dropna()
Walking_normal.index = pd.RangeIndex(len(Walking_normal.index))
Walking_normal = Walking_normal.drop(0)

Jumping = dt[['Jumping', 'Unnamed: 7', 'Unnamed: 8']] 
Jumping = Jumping.dropna()
Jumping.index = pd.RangeIndex(len(Jumping.index))
Jumping = Jumping.drop(0)

Jogging = dt[['Jogging', 'Unnamed: 10', 'Unnamed: 11']] 
Jogging = Jogging.dropna()
Jogging.index = pd.RangeIndex(len(Jogging.index))
Jogging = Jogging.drop(0)

Sit_chair = dt[['Sit chair', 'Unnamed: 13', 'Unnamed: 14']] 
Sit_chair = Sit_chair.dropna()
Sit_chair.index = pd.RangeIndex(len(Sit_chair.index)) 
Sit_chair = Sit_chair.drop(0)

Stairs_up = dt[['Stairs up', 'Unnamed: 16', 'Unnamed: 17']] 
Stairs_up = Stairs_up.dropna()
Stairs_up.index = pd.RangeIndex(len(Stairs_up.index)) 
Stairs_up = Stairs_up.drop(0)
    
Stairs_down = dt[['Stairs down', 'Unnamed: 19', 'Unnamed: 20']] 
Stairs_down = Stairs_down.dropna()
Stairs_down.index = pd.RangeIndex(len(Stairs_down.index))
Stairs_down = Stairs_down.drop(0)

print (len(Standing))
print (len (Walking_normal))
print (len (Jumping))
print (len (Jogging))
print (len (Sit_chair))
print (len (Stairs_up))
print (len (Stairs_down))

28938
26158
7909
7903
2794
3137
3160


In [7]:
# Split dataset to 2 parts: Train (60%) - Test (40%)\n",

window_size = 16
stride = 6 #step

# range (start, stop, step)
X_stand_train = [Standing[i:i+window_size] for i in range(0, int(len(Standing)*0.6), stride)] 
X_stand_test = [Standing[i:i+window_size] for i in range(int(len(Standing)*0.6), len(Standing), stride) 
                                            if i+window_size<=len(Standing)]

X_walk_train = [Walking_normal[i:i+window_size] for i in range(0, int(len(Walking_normal)*0.6), stride)]
X_walk_test = [Walking_normal[i:i+window_size] for i in range(int(len(Walking_normal)*0.6), len(Walking_normal),                                stride) if i+window_size<=len(Walking_normal)]

X_jump_train = [Jumping[i:i+window_size] for i in range(0, int(len(Jumping)*0.6), stride)]
X_jump_test = [Jumping[i:i+window_size] for i in range(int(len(Jumping)*0.6), len(Jumping), stride) 
                                            if i+window_size<=len(Jumping)]

X_jog_train = [Jogging[i:i+window_size] for i in range(0, int(len(Jogging)*0.6), stride)] 
X_jog_test = [Jogging[i:i+window_size] for i in range(int(len(Jogging)*0.6), len(Jogging), stride) 
                                        if i+window_size<=len(Jogging)]
X_sit_train = [Sit_chair[i:i+window_size] for i in range(0, int(len(Sit_chair)*0.6), stride)] 
X_sit_test = [Sit_chair[i:i+window_size] for i in range(int(len(Sit_chair)*0.6), len(Sit_chair), stride) 
                                            if i+window_size<=len(Sit_chair)]

X_stairUp_train = [Stairs_up[i:i+window_size] for i in range(0, int(len(Stairs_up)*0.6), stride)] 
X_stairUp_test = [Stairs_up[i:i+window_size] for i in range(int(len(Stairs_up)*0.6), len(Stairs_up), stride) 
                                                if i+window_size<=len(Stairs_up)]

X_stairDown_train = [Stairs_down[i:i+window_size] for i in range(0, int(len(Stairs_down)*0.6), stride)] 
X_stairDown_test = [Stairs_down[i:i+window_size] for i in range(int(len(Stairs_down)*0.6), len(Stairs_down),                                                        stride) if i+window_size<=len(Stairs_down)]

print ('X_stand_train: ', len(X_stand_train))
print ('X_stand_test: ', len(X_stand_test))

print ('X_walk_train: ', len(X_walk_train))
print ('X_walk_test: ', len(X_walk_test))

print ('X_jump_train: ', len (X_jump_train))
print ('X_jump_test: ', len (X_jump_test))

print ('X_jog_train: ', len (X_jog_train))
print ('X_jog_test: ', len (X_jog_test))

print ('X_jump_train: ', len (X_jump_train))
print ('X_jump_test: ', len (X_jump_test))

print ('X_sit_train: ', len (X_sit_train))
print ('X_sit_test: ', len (X_sit_test))

print ('X_stairUp_train: ', len (X_stairUp_train))
print ('X_stairUp_test: ', len (X_stairUp_test))

print ('X_stairDown_train: ', len (X_stairDown_train))
print ('X_stairDown_test: ', len (X_stairDown_test))


X_stand_train:  2894
X_stand_test:  1927
X_walk_train:  2616
X_walk_test:  1742
X_jump_train:  791
X_jump_test:  525
X_jog_train:  791
X_jog_test:  525
X_jump_train:  791
X_jump_test:  525
X_sit_train:  280
X_sit_test:  184
X_stairUp_train:  314
X_stairUp_test:  207
X_stairDown_train:  316
X_stairDown_test:  209


In [8]:
train_data = []
train_label = []

test_data = []
test_label = []

for acts in X_stand_train:
    train_data.append(acts)
    train_label.append(0)
    
for acts in X_walk_train:
    train_data.append(acts)
    train_label.append(1)

for acts in X_jump_train:
    train_data.append(acts)
    train_label.append(2)

for acts in X_jog_train:
    train_data.append(acts)
    train_label.append(3)

for acts in X_sit_train:
    train_data.append(acts)
    train_label.append(3)

for acts in X_stairUp_train:
    train_data.append(acts)
    train_label.append(4)

for acts in X_stairDown_train:
    train_data.append(acts)
    train_label.append(5)

print('train-data length: ', len(train_data) )
print('train-label length: ', len(train_label) )
#print(train_label)
      
# For TEST

for acts in X_stand_test:
    test_data.append(acts)
    test_label.append(0)

for acts in X_walk_test:
    test_data.append(acts)
    test_label.append(1)

for acts in X_jump_test:
    test_data.append(acts)
    test_label.append(2)

for acts in X_jog_test:
    test_data.append(acts)
    test_label.append(3)

for acts in X_sit_test:
    test_data.append(acts)
    test_label.append(4)

for acts in X_stairUp_test:
    test_data.append(acts)
    test_label.append(5)

for acts in X_stairDown_test:
    test_data.append(acts)
    test_label.append(6)

print('test-data length: ', len(test_data))
print('test-label length: ', len(test_label))

train-data length:  8002
train-label length:  8002
test-data length:  5319
test-label length:  5319


In [9]:
# Tạo mảng features 
train_features = []
test_features = []
for action in train_data:
    feat = featuresFromBuffer(action)
    train_features.append(feat)  

for action in test_data:
    feat = featuresFromBuffer(action)
    #print(feat)
    test_features.append(feat)
    #print(test_features)

#print (train_features)
len(train_features)
len(test_features)


8002

5319

In [None]:
# RandomizedSearch

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint

param_dist = {"max_depth": [3, None],                  #distribution
              "n_estimators":[50,100,200,300,400,500],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

forest_random = RandomizedSearchCV( estimator=RandomForestClassifier( random_state=0 ),
                                    param_distributions=param_dist,
                                    cv=3,              #CV
                                    n_iter=1944,          #interation num
                                    scoring="accuracy", #metrics
                                    n_jobs=1,           #num of core
                                    verbose=0,          
                                    random_state=1)

forest_random.fit(train_features, train_label)
forest_random_best = forest_random.best_estimator_ #best estimator
print("Best Model Parameter: ",forest_random.best_params_)

In [None]:
# Exhaustive Grid Search
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

n_estimators = [50, 100,150 200,250, 300, 500, 800]
max_depth = [4,5,6,7,8, 15]
min_samples_split = [2,3, 5,8, 10, 15]
min_samples_leaf = [1, 2,3,4, 5,6, 10] 

forest = RandomForestClassifier(random_state = 1)

hyperGrid = dict(n_estimators = n_estimators, max_depth = max_depth,  
              min_samples_split = min_samples_split, 
             min_samples_leaf = min_samples_leaf)

grid = GridSearchCV(forest, hyperF, cv = 3, verbose = 1, 
                      n_jobs = -1)
bestF = grid.fit(train_features, train_label)


In [None]:
#Run with value vua tim đc
forestOpt = RandomForestClassifier(random_state = 1, max_depth = 15,     n_estimators = 500, min_samples_split = 2, min_samples_leaf = 1)
                                   
modelOpt = forestOpt.fit(train_features, train_label)
y_pred = modelOpt.predict(test_features)