In [46]:
import numpy as np 
import pandas as pd
from scipy import signal
import matplotlib.pyplot as plt 
import math
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

df = None
#read file from dataset
def load_dataset(i):
    df = pd.read_csv('dataset_' + str(i + 1) + '.txt', sep=',', header=None)

def data_visulization(i,c,sensor): 
    df = pd.read_csv('dataset_' + str(i + 1) + '.txt', sep=',', header=None)
    activity_data = df[df[24] == c].values
    if sensor == 'wrist-a':
        t = 0
    elif sensor == 'wrist-g':
        t = 1
    elif sensor == 'chest-a':
        t = 2
    elif sensor == 'chest-g':
        t = 3
    elif sensor == 'hip-a':
        t = 4
    elif sensor == 'hip-g':
        t = 5
    elif sensor == 'ankle-a':
        t = 6
    elif sensor == 'ankle-g':
        t = 7
    plt.plot(activity_data[:, 0+3*t : 3+3*t])
    plt.show()
        
def noise_removing(i,c):
    df = pd.read_csv('dataset_' + str(i + 1) + '.txt', sep=',', header=None)
    b, a = signal.butter(4, 0.04, 'lowpass', analog=False)
    activity_data = df[df[24] == c].values
    for n in range(24):
        activity_data[:,n] = signal.lfilter(b, a, activity_data[:, n])

#the function append feature from data to feature array
def sample(arr,sample_data):
    for i in range(24):
        arr.append(np.min(sample_data[:, i]))
        arr.append(np.max(sample_data[:, i]))
        arr.append(np.mean(sample_data[:, i]))
        arr.append(np.average(sample_data[:, i]))
        arr.append(np.median(sample_data[:, i]))
    arr.append(sample_data[0, -1])
    arr = np.array([arr])
    return arr

def feature_engineering():
    training = np.empty(shape=(0, 121))
    testing = np.empty(shape=(0, 121))
    for i in range(19):
        df = pd.read_csv('dataset_' + str(i + 1) + '.txt', sep=',', header=None)
        print('deal with dataset ' + str(i + 1))
        for c in range(1, 14):
            activity_data = df[df[24] == c].values
            b, a = signal.butter(4, 0.04, 'low', analog=False)
            for j in range(24):
                activity_data[:, j] = signal.lfilter(b, a, activity_data[:, j])
                
            datat_len = len(activity_data)
            training_len = math.floor(datat_len * 0.8)
            training_data = activity_data[:training_len, :]
            testing_data = activity_data[training_len:, :]
            
            training_sample_number = training_len // 1000 + 1
            testing_sample_number = (datat_len - training_len) // 1000 + 1
            
            for s in range(training_sample_number):
                if s < training_sample_number - 1:
                    sample_data = training_data[1000*s:1000*(s + 1), :]
                else:
                    sample_data = training_data[1000*s:, :]
            
                feature_sample = []
                feature_sample = sample(feature_sample,sample_data)
                training = np.concatenate((training, feature_sample), axis=0)
                
            for s in range(testing_sample_number):
                if s < testing_sample_number - 1:
                    sample_data = testing_data[1000*s:1000*(s + 1), :]
                else:
                    sample_data = testing_data[1000*s:, :]

                feature_sample = []
                feature_sample = sample(feature_sample,sample_data)
                testing = np.concatenate((testing, feature_sample), axis=0)
                
    df_training = pd.DataFrame(training)
    df_testing = pd.DataFrame(testing)
    df_training.to_csv('training_data.csv', index=None, header=None)
    df_testing.to_csv('testing_data.csv', index=None, header=None)
    
def model_training_and_evaluation():
    df_training = pd.read_csv('training_data.csv', header=None)
    df_testing = pd.read_csv('testing_data.csv', header=None)
    
    #training input
    y_train = df_training[df_training.shape[1] - 1].values - 1
    y_train = y_train.astype(int)
    df_training = df_training.drop([df_training.shape[1] - 1], axis=1)
    X_train = df_training.values
    
    #testing input
    y_test = df_testing[df_testing.shape[1] - 1].values - 1
    y_test = y_test.astype(int)
    df_testing = df_testing.drop([df_testing.shape[1] - 1], axis=1)
    X_test = df_testing.values
    
    #normalization method
    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    #Using KNN classifier
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    print('Accuracy: ', accuracy_score(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

if __name__ == '__main__':
    
    #data_visulization(i,c,'sensor')
    #sensor: wrist-a, wrist-g, chest-a, chest-g, hip-a, hip-g, ankle-a, ankle-g
    feature_engineering()
    model_training_and_evaluation()

deal with dataset 1
deal with dataset 2
deal with dataset 3
deal with dataset 4
deal with dataset 5
deal with dataset 6
deal with dataset 7
deal with dataset 8
deal with dataset 9
deal with dataset 10
deal with dataset 11
deal with dataset 12
deal with dataset 13
deal with dataset 14
deal with dataset 15
deal with dataset 16
deal with dataset 17
deal with dataset 18
deal with dataset 19
Accuracy:  0.9241842610364683
[[ 57   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0  57   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0  56   1   0   0   0   0   0   0   0   0   0]
 [  0   0   2  97   0   0   0   0   0   0   0   0   0]
 [  0   0   1   1  47   5   1   2   0   0   0   0   0]
 [  0   0   0   2  17  65   0   1   0   0   0   0   0]
 [  0   0   2   0   2   1 212   1   0   0   0   1   0]
 [  0   0   0   0   0   0   5  33   0   0   1   0   0]
 [  0   0   0   0   0   1   1   0  36   0   0   0   0]
 [  0   0   0   0   0   0   1   0   0  95   0   0   0]
 [  0   0   0   0   0   1   1 