In [1]:
import numpy as np 
import pandas as pd 
from scipy import signal
import matplotlib.pyplot as plt 
import math
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split, TimeSeriesSplit
from dataset_tools import Dataset, lowpass_filter, extract_sensors, extract_features

# Loading & Extracting

In [2]:
person1 = Dataset('dataset/dataset_1.txt')

| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10| 11| 12| 13| 14|15 | 16| 17|   18| 19| 20| 21| 22| 23| 24|
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|Wrist:A1(X)|A2(Y)|A3(Z)|G1(x)|G2(Y)|G3(Z)|Chest:A1(X)|A2(Y)|A3(Z)|G1(X)|G2(Y)|G3(Z)|Hip:A1(X)|A2(Y)|A3(Z)|G1(X)|G2(Y)|G3(Z)|Ankle:A1(X)|A2(Y)|A3(Z)|G1(X)|G2(Y)|G3(Z)|Type|

# Visualising

In [3]:
person1.sitting['wrist']['accel'][:1000].describe()

Unnamed: 0,Ax,Ay,Az
count,1000.0,1000.0,1000.0
mean,-0.207985,0.948774,0.251779
std,0.009851,0.008983,0.009556
min,-0.23966,0.91756,0.22306
25%,-0.21621,0.94161,0.24348
50%,-0.2084,0.94963,0.25165
75%,-0.20058,0.95364,0.25982
max,-0.18105,0.98169,0.28841


# Filtering

In [4]:
lowpass_filter(person1.data_frame, 4, 5)
person1.extract_sensors(200)

  b = a[a_slice]


In [8]:
train, test = extract_features(person1.sitting['all'],[np.max, np.min], 1000)
train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,38,39,40,41,42,43,44,45,46,47
0,-0.110459,-0.231437,0.986761,0.94018,0.270707,0.166981,1.974314,-26.030572,16.327567,-0.571387,...,-0.093302,-0.101885,-0.527598,-0.548516,-3.548338,-9.369748,10.395091,2.620909,-3.691356,-5.09418
1,-0.197875,-0.217586,0.955124,0.943189,0.261823,0.243228,1.480237,0.037643,0.417763,-0.372272,...,-0.092767,-0.10074,-0.536801,-0.54691,-7.40959,-8.286622,6.9031,5.999316,-4.405974,-5.074741
2,-0.197293,-0.211868,0.956022,0.944656,0.256197,0.24667,1.115792,0.197734,0.357744,-0.362516,...,-0.091923,-0.101445,-0.534426,-0.543729,-7.473374,-8.239958,6.970332,6.062706,-4.249977,-5.116903
3,-0.196612,-0.214056,0.957253,0.943228,0.256893,0.244819,1.555727,0.079917,0.287182,-0.592759,...,-0.09187,-0.100751,-0.538218,-0.546398,-7.396417,-8.117047,6.895531,6.099309,-4.402901,-5.080026
4,-0.197523,-0.215268,0.956424,0.945416,0.259458,0.245035,1.350912,0.029976,0.228137,-0.448245,...,-0.089283,-0.09785,-0.537608,-0.547537,-7.309709,-8.238692,6.969044,6.053595,-4.422771,-5.140349
5,-0.196547,-0.213863,0.95405,0.945469,0.258108,0.247253,1.152642,0.270822,0.368619,-0.353228,...,-0.08907,-0.098837,-0.538417,-0.54644,-7.170889,-8.092456,6.878895,6.127601,-4.179443,-5.047237
6,-0.195333,-0.210328,0.954844,0.942741,0.256751,0.248782,1.156741,0.367854,0.176442,-0.468381,...,-0.088987,-0.098996,-0.536136,-0.546053,-7.355152,-8.040776,7.008446,6.067818,-4.39009,-5.040732
7,-0.194929,-0.206579,0.953667,0.94395,0.258187,0.247043,1.092113,0.242841,0.314977,-0.371277,...,-0.088451,-0.096941,-0.538556,-0.547317,-7.445187,-8.350356,6.82649,6.017362,-4.453908,-5.163562
8,-0.196318,-0.20915,0.956089,0.943395,0.25958,0.24908,1.450528,-0.24427,0.17053,-0.612298,...,-0.088428,-0.09671,-0.538732,-0.547649,-7.433635,-8.63454,6.934794,6.075479,-4.454804,-5.178462


# Functions TODO:
6. training the given models -> sample code given
7. test the given models -> sample code given
8. print out the evaluation results -> sample code given

# Feature Engineering


In [8]:
def feature_engineering_example():
    training = np.empty(shape=(0, 10))
    testing = np.empty(shape=(0, 10))
    # deal with each dataset file
    for i in range(19):
        df = pd.read_csv('dataset/dataset_' + str(i + 1) + '.txt', sep=',', header=None)
        print('deal with dataset ' + str(i + 1))
        for c in range(1, 14):
            activity_data = df[df[24] == c].values
            b, a = signal.butter(4, 0.04, 'low', analog=False)
            for j in range(24):
                activity_data[:, j] = signal.lfilter(b, a, activity_data[:, j])
            
            datat_len = len(activity_data)
            training_len = math.floor(datat_len * 0.8)
            training_data = activity_data[:training_len, :]
            testing_data = activity_data[training_len:, :]

            # data segementation: for time series data, we need to segment the whole time series, and then extract features from each period of time
            # to represent the raw data. In this example code, we define each period of time contains 1000 data points. Each period of time contains 
            # different data points. You may consider overlap segmentation, which means consecutive two segmentation share a part of data points, to 
            # get more feature samples.
            training_sample_number = training_len // 1000 + 1
            testing_sample_number = (datat_len - training_len) // 1000 + 1

            for s in range(training_sample_number):
                if s < training_sample_number - 1:
                    sample_data = training_data[1000*s:1000*(s + 1), :]
                else:
                    sample_data = training_data[1000*s:, :]
                # in this example code, only three accelerometer data in wrist sensor is used to extract three simple features: min, max, and mean value in
                # a period of time. Finally we get 9 features and 1 label to construct feature dataset. You may consider all sensors' data and extract more

                feature_sample = []
                for i in range(3):
                    feature_sample.append(np.min(sample_data[:, i]))
                    feature_sample.append(np.max(sample_data[:, i]))
                    feature_sample.append(np.mean(sample_data[:, i]))
                feature_sample.append(sample_data[0, -1])
                feature_sample = np.array([feature_sample])
                training = np.concatenate((training, feature_sample), axis=0)
            
            for s in range(testing_sample_number):
                if s < training_sample_number - 1:
                    sample_data = testing_data[1000*s:1000*(s + 1), :]
                else:
                    sample_data = testing_data[1000*s:, :]

                feature_sample = []
                for i in range(3):
                    feature_sample.append(np.min(sample_data[:, i]))
                    feature_sample.append(np.max(sample_data[:, i]))
                    feature_sample.append(np.mean(sample_data[:, i]))
                feature_sample.append(sample_data[0, -1])
                feature_sample = np.array([feature_sample])
                testing = np.concatenate((testing, feature_sample), axis=0)

    df_training = pd.DataFrame(training)
    df_testing = pd.DataFrame(testing)
    df_training.to_csv('training_data.csv', index=None, header=None)
    df_testing.to_csv('testing_data.csv', index=None, header=None)


