In [70]:
import os
import numpy as np
from collections import defaultdict
from itertools import product
from pandas import Series
import warnings
warnings.filterwarnings("ignore")
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [2]:
def preprocess(data, kernel_size):
    data_out = np.zeros(data.shape)
    for ch in range(data.shape[1]):
        kps_seq_ch = data[:, ch]
        kps_seq_ch = Series(kps_seq_ch).rolling(kernel_size, min_periods=1, center=True).mean().to_numpy()
        data_out[:, ch] = kps_seq_ch
    return data_out

In [3]:
def segmentation(data, win_size):
    ''' Sliding window parameters '''
    win_len = int(30*win_size) # 1 sec x 30 Hz
    win_step = int(30*0.5) # 0.5 sec x 30 Hz
    sample_windows = []
    for start_time in range(0, data.shape[0], win_step):
        end_time = start_time + win_len
        if end_time > data.shape[0]:
            end_time = data.shape[0]
            start_time = end_time - win_len
        frame = data[start_time:end_time]
        assert frame.shape[0] == win_len, (start_time, end_time, data.shape[0])
        sample_windows.append(frame)
    sample_windows = np.array(sample_windows)
    return sample_windows

In [52]:
def feature_extraction(sample_windows):
    ''' extract mean and std from each frame'''
    N, T, D = sample_windows.shape
    feats = []
    for i in range(N):
        frame = sample_windows[i]
        feat = []
        for ch in range(D):
            frame_ch = frame[:,ch]
            # mean feature
            mean_ch = np.mean(frame_ch)
            feat.append(mean_ch)
            # std feature
            std_ch = np.std(frame_ch)
            feat.append(std_ch)
            # min feature
            min_ch = np.min(frame_ch)
            feat.append(min_ch)
            # max feature
            max_ch = np.max(frame_ch)
            feat.append(max_ch)
        feats.append(feat)
    feats = np.array(feats)
    return feats

In [66]:
# get file names
file_names = os.listdir('pose')
# create a dictionary to store features along with labels
data_dict = defaultdict(list)
for file_name in file_names:
    # extract subject number from s##
    subject_number = int(file_name[5:7])
    # full directory of the numpy array
    directory = 'pose/' + file_name
    # label of activity from 0-15
    label_act = int(file_name[1:3]) - 1
    # read file
    data3D = np.load(directory)
    # reshape to remove the 3rd dimension
    data = data3D.reshape(data3D.shape[0], -1)
    # preprocess 
    kernel = 5
    data_prep = preprocess(data, kernel)
    # segment data
    win_len = 1.5
    data_seg = segmentation(data_prep, win_len)
    # number of segments
    N = data_seg.shape[0]
    # extract features
    features = feature_extraction(data_seg)
    # store in data_dict
    data_dict[subject_number].append((features, [label_act] * N)) 

In [67]:
trainx_list = []
train_labels = []

for i in range(1, 6): # number of train subjects (1-8)
    for j in range(32): # number of training samples for each subject (16 activities * 2 (sit-stand))
        trainx_list.append(data_dict[i][j][0])
        train_labels.append(data_dict[i][j][1])
        
trainx = np.vstack(trainx_list)
trainy = np.hstack(train_labels)

valx_list = []
val_labels = []

for i in range(6, 8): # number of train subjects (1-8)
    for j in range(32): # number of training samples for each subject (16 activities * 2 (sit-stand))
        valx_list.append(data_dict[i][j][0])
        val_labels.append(data_dict[i][j][1])
        
valx = np.vstack(valx_list)
valy = np.hstack(val_labels)

testx_list = []
test_labels = []
for i in range(8, 11): # number of test subjects (1-8)
    for j in range(32): # number of training samples for each subject (16 activities * 2 (sit-stand))
        testx_list.append(data_dict[i][j][0])
        test_labels.append(data_dict[i][j][1])
        
testx = np.vstack(testx_list)
testy = np.hstack(test_labels)

In [63]:
trainx.shape

(2243, 264)

In [68]:
# Define the hyperparameter grid to search
param_grid = {
    'hidden_layer_sizes': [(100, 50), (40, 20), (20, )],   
    'alpha': [0.001, 0.01], 
    'max_iter': [100], 
    'solver': ['adam'], 
    'learning_rate': ['adaptive']
}

# best score and best params to be found
best_score = 0
best_params = {}

# iterate through all combinations of hyperparameters
for params in product(*param_grid.values()):
    param_dict = {key: value for key, value in zip(param_grid.keys(), params)}
    print(params)
    
    # create a model with specific hyperparameters
    model = MLPClassifier(**param_dict)
    
    # train the model on the training data
    model.fit(trainx, trainy)
    
    # evaluate on the validation set
    val_predictions = model.predict(valx)
    val_accuracy = accuracy_score(valy, val_predictions)
    
    # check if this set of parameters is better than the previous best
    if val_accuracy > best_score:
        best_score = val_accuracy
        best_params = param_dict

# train the best model on the combined training and validation data
best_model = MLPClassifier(**best_params)
trainx_all = np.vstack((trainx, valx))
trainy_all = np.hstack((trainy, valy))
best_model.fit(trainx_all, trainy_all)

# evaluate the best model on the test set
test_predictions = best_model.predict(testx)
test_accuracy = accuracy_score(testy, test_predictions)

print("Best Parameters:", best_params)
print("Validation Set Accuracy with Best Parameters:", best_score)
print("Test Set Accuracy with Best Parameters:", test_accuracy)


((100, 50), 0.001, 100, 'adam', 'adaptive')
((100, 50), 0.01, 100, 'adam', 'adaptive')
((40, 20), 0.001, 100, 'adam', 'adaptive')
((40, 20), 0.01, 100, 'adam', 'adaptive')
((20,), 0.001, 100, 'adam', 'adaptive')
((20,), 0.01, 100, 'adam', 'adaptive')
Best Parameters: {'hidden_layer_sizes': (100, 50), 'alpha': 0.001, 'max_iter': 100, 'solver': 'adam', 'learning_rate': 'adaptive'}
Validation Set Accuracy with Best Parameters: 0.6013437849944009
Test Set Accuracy with Best Parameters: 0.5415224913494809


In [69]:
# Define the hyperparameter grid to search
n = 5
param_grid = {
    'n_estimators': [20, 30, 50],
    'max_depth': list(np.arange(2, n+1)),  
    'min_samples_leaf': list(np.arange(2, n+1))  
}

# best score and best params to be found
best_score = 0
best_params = {}

# iterate through all combinations of hyperparameters
for params in product(*param_grid.values()):
    param_dict = {key: value for key, value in zip(param_grid.keys(), params)}
    print(params)
    
    # create a model with specific hyperparameters
    model = RandomForestClassifier(**param_dict)
    
    # train the model on the training data
    model.fit(trainx, trainy)
    
    # evaluate on the validation set
    val_predictions = model.predict(valx)
    val_accuracy = accuracy_score(valy, val_predictions)
    
    # check if this set of parameters is better than the previous best
    if val_accuracy > best_score:
        best_score = val_accuracy
        best_params = param_dict

# train the best model on the combined training and validation data
best_model = RandomForestClassifier(**best_params)
trainx_all = np.vstack((trainx, valx))
trainy_all = np.hstack((trainy, valy))
best_model.fit(trainx_all, trainy_all)

# evaluate the best model on the test set
test_predictions = best_model.predict(testx)
test_accuracy = accuracy_score(testy, test_predictions)

print("Best Parameters:", best_params)
print("Validation Set Accuracy with Best Parameters:", best_score)
print("Test Set Accuracy with Best Parameters:", test_accuracy)


(20, 2, 2)
(20, 2, 3)
(20, 2, 4)
(20, 2, 5)
(20, 3, 2)
(20, 3, 3)
(20, 3, 4)
(20, 3, 5)
(20, 4, 2)
(20, 4, 3)
(20, 4, 4)
(20, 4, 5)
(20, 5, 2)
(20, 5, 3)
(20, 5, 4)
(20, 5, 5)
(30, 2, 2)
(30, 2, 3)
(30, 2, 4)
(30, 2, 5)
(30, 3, 2)
(30, 3, 3)
(30, 3, 4)
(30, 3, 5)
(30, 4, 2)
(30, 4, 3)
(30, 4, 4)
(30, 4, 5)
(30, 5, 2)
(30, 5, 3)
(30, 5, 4)
(30, 5, 5)
(50, 2, 2)
(50, 2, 3)
(50, 2, 4)
(50, 2, 5)
(50, 3, 2)
(50, 3, 3)
(50, 3, 4)
(50, 3, 5)
(50, 4, 2)
(50, 4, 3)
(50, 4, 4)
(50, 4, 5)
(50, 5, 2)
(50, 5, 3)
(50, 5, 4)
(50, 5, 5)
Best Parameters: {'n_estimators': 50, 'max_depth': 5, 'min_samples_leaf': 4}
Validation Set Accuracy with Best Parameters: 0.49608062709966405
Test Set Accuracy with Best Parameters: 0.4662629757785467
