In [1]:
from itertools import product
from pandas import Series
import numpy as np

def preprocess(data, kernel_size):
    data_out = np.zeros(data.shape)
    for ch in range(data.shape[1]):
        kps_seq_ch = data[:, ch]
        kps_seq_ch = Series(kps_seq_ch).rolling(kernel_size, min_periods=1, center=True).mean().to_numpy()
        data_out[:, ch] = kps_seq_ch
    return data_out

In [2]:
def segmentation(data, win_size):
    ''' Sliding window parameters '''
    win_len = int(30*win_size) # 1 sec x 30 Hz
    win_step = int(30*0.5) # 0.5 sec x 30 Hz
    sample_windows = []
    for start_time in range(0, data.shape[0], win_step):
        end_time = start_time + win_len
        if end_time > data.shape[0]:
            end_time = data.shape[0]
            start_time = end_time - win_len
        frame = data[start_time:end_time]
        assert frame.shape[0] == win_len, (start_time, end_time, data.shape[0])
        sample_windows.append(frame)
    sample_windows = np.array(sample_windows)
    return sample_windows

In [3]:
def feature_extraction(sample_windows):
    ''' extract mean and std from each frame'''
    N, T, D = sample_windows.shape
    feats = []
    for i in range(N):
        frame = sample_windows[i]
        feat = []
        for ch in range(D):
            frame_ch = frame[:,ch]
            # mean feature
            mean_ch = np.mean(frame_ch)
            feat.append(mean_ch)
            # std feature
            std_ch = np.std(frame_ch)
            feat.append(std_ch)
            # min feature
            min_ch = np.min(frame_ch)
            feat.append(min_ch)
            # max feature
            max_ch = np.max(frame_ch)
            feat.append(max_ch)
        feats.append(feat)
    feats = np.array(feats)
    return feats

In [4]:
import os
from collections import defaultdict
file_names = os.listdir('pose')

data_dict = defaultdict(list)
for file_name in file_names:
    data3D = np.load('pose/' + file_name)
    data = data3D.reshape(data3D.shape[0], -1)
    kernel = 5
    data_prep = preprocess(data, kernel)
    win_len = 1.5
    data_seg = segmentation(data_prep, win_len)
    N = data_seg.shape[0]
    features = feature_extraction(data_seg)
    data_dict[int(file_name[5:7])].append((features, [int(file_name[1:3]) - 1] * N)) 

In [5]:
train_x = []
train_y = []

for i in range(1, 8): # number of train subjects (1-8)
    for j in range(32): # number of training samples for each subject (16 activities * 2 (sit-stand))
        train_x.append(data_dict[i][j][0])
        train_y.append(data_dict[i][j][1])
        
trainx = np.vstack(train_x)
trainy = np.hstack(train_y)

test_x = []
test_y= []
for i in range(8, 11): # number of test subjects (1-8)
    for j in range(32): # number of training samples for each subject (16 activities * 2 (sit-stand))
        test_x.append(data_dict[i][j][0])
        test_y.append(data_dict[i][j][1])
        
testx = np.vstack(test_x)
testy = np.hstack(test_y)

In [6]:
pip install -U scikit-learn scipy matplotlib

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

def build_rf(train_x, train_y, test_x, test_y):
    
    params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [2, 3, 5, 8, 10], 
    'min_samples_leaf': [3, 5, 10, 15, 20]}

    rf = RandomForestClassifier()

    grid = GridSearchCV(rf, param_grid=params, scoring='accuracy', cv=5)
    grid.fit(train_x, train_y)

    best_rf = grid.best_estimator_
    best_rf.fit(train_x, train_y)

    train_preds1 = best_rf.predict(train_x)
    test_preds1 = best_rf.predict(test_x)
    
    accuracy_train = accuracy_score(train_y, train_preds1)
    accuracy_test = accuracy_score(test_y, test_preds1)

    

    results = {
             'train_accuracy' : accuracy_train,
             'test_accuracy' : accuracy_test,
             'params': grid.best_params_
            }

    return results

In [8]:
import warnings
warnings.filterwarnings("ignore")

r1 = build_rf(trainx, trainy, testx, testy)

In [9]:
from sklearn.neural_network import MLPClassifier
def tune_nn(train_x, train_y, test_x, test_y):

    param_grid = {
    'hidden_layer_sizes': [(10,), (50,), (50,20), (100,), (100, 50)] ,
    'activation': ['logistic', 'tanh', 'relu'], 
    'alpha': [0.0001, 0.001, 0.01, 0.1]
    }
    
    nn = MLPClassifier()

    grid = GridSearchCV(nn, param_grid, cv=5, scoring='accuracy')

    grid.fit(train_x, train_y)
    
    best_nn = grid.best_estimator_
    best_nn.fit(train_x, train_y)
    

    train_preds1 = best_nn.predict(train_x)
    test_preds1 = best_nn.predict(test_x)
                                 
    accuracy_train = accuracy_score(train_y, train_preds1)
    accuracy_test = accuracy_score(test_y, test_preds1)

    

    results = {
             'train_accuracy' : accuracy_train,
             'test_accuracy' : accuracy_test,
             'params': grid.best_params_}

    return results

In [10]:
r2 = tune_nn(trainx, trainy, testx, testy)

In [11]:
r1

{'train_accuracy': 0.9776785714285714,
 'test_accuracy': 0.5,
 'params': {'max_depth': 10, 'min_samples_leaf': 3, 'n_estimators': 200}}

In [12]:
r2

{'train_accuracy': 0.9311224489795918,
 'test_accuracy': 0.5311418685121108,
 'params': {'activation': 'tanh',
  'alpha': 0.0001,
  'hidden_layer_sizes': (100, 50)}}