# har-classifiers

unsupervised cross subjects domain adaptation for human activity recognition

In [0]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Conv2DTranspose, Lambda

import pandas as pd
import numpy as np
import math
from matplotlib import pyplot as plt
import collections
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from IPython.display import clear_output

import os
import time

import gc

print(tf.__version__)

2.2.0


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

## Dataset

We use HCI HAR dataset. For more information about this dataset, you can click [Human Activity Recognition Using Smartphones Data Set, UCI Machine Learning Repository](https://link.springer.com/chapter/10.1007/978-3-642-35395-6_30)

In [0]:
# download dataset
_URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00240/UCI%20HAR%20Dataset.zip'

path_to_zip = tf.keras.utils.get_file('HARDataset.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'UCI HAR Dataset/')

Downloading data from https://archive.ics.uci.edu/ml/machine-learning-databases/00240/UCI%20HAR%20Dataset.zip


### Load data

In [0]:
def load_file(filepath):
    '''
    load a single file as a numpy array
    '''
    dataframe = pd.read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values


def load_group(filenames):
    '''
    load a list of files into a 3D array of [samples, timesteps, features]
    '''
    loaded = list()
    for name in filenames:
        data = load_file(name)
        loaded.append(data)
    loaded = np.dstack(loaded)
    return loaded


def load_dataset(train_dir_path, test_dir_path):
    '''
    load dataset as train and test
    '''
    files_prefix = ['total_acc_x_', 'total_acc_y_', 'total_acc_z_', 'body_acc_x_',
                    'body_acc_y_', 'body_acc_z_', 'body_gyro_x_', 'body_gyro_y_', 'body_gyro_z_']

    train_files = [train_dir_path + 'Inertial Signals/' +
                   pre + 'train.txt' for pre in files_prefix]
    test_files = [test_dir_path + 'Inertial Signals/' +
                  pre + 'test.txt' for pre in files_prefix]

    train_X = load_group(train_files)
    train_y = load_file(train_dir_path + 'y_train.txt')

    test_X = load_group(test_files)
    test_y = load_file(test_dir_path + 'y_test.txt')

    return train_X, train_y, test_X, test_y


def scale_data(X):
    '''
    scale data to [-1, 1]
    '''
    scaled = X / abs(X).max()
    return scaled


def scale_dataset(dataset):
    scaled = []
    for i in range(dataset.shape[-1]):
        scaled.append(scale_data(dataset[..., i]))
    return np.dstack(scaled)


def data_for_subject(X, y, sub_map, sub_id):
    '''
    get data for one subject
    '''
    xi = [i for i in range(len(sub_map)) if sub_map[i] == sub_id]
    return X[xi], y[xi]


def to_series(windows):
    '''
    remove overlap and convert a series of continuous windows to a 1D list
    '''
    series = []
    n, win_size = windows.shape
    series += list(windows[0][:win_size // 2])
    for i in range(n):
        series += list(windows[i][win_size // 2:])
    return series

Load train data and test test

In [0]:
train_X, train_y, test_X, test_y = load_dataset(
    PATH + 'train/', PATH + 'test/')

# zero-offset class values
train_y -= 1
test_y -= 1

total_X = np.vstack((train_X, test_X))
total_y = np.vstack((train_y, test_y))
total_y = total_y.reshape(total_y.shape[0])

print(total_X.shape, total_y.shape)

train_sub_map = load_file(PATH + 'train/subject_train.txt')
test_sub_map = load_file(PATH + 'test/subject_test.txt')
sub_map = np.vstack((train_sub_map, test_sub_map))

(10299, 128, 9) (10299,)


In [0]:
har_uda_root = '/content/gdrive/My Drive/har-uda/'
data_path = har_uda_root+'data/'
ckp_path = har_uda_root+'checkpoints/'

In [0]:
sub_num = 30
sub_data = []

for i in range(sub_num):
    data_x, data_y = data_for_subject(total_X, total_y, sub_map, i+1)
    data_x= scale_dataset(data_x)

    data_x_train, data_x_test, data_y_train, data_y_test = train_test_split(data_x, data_y, test_size=0.2, random_state=1, stratify=data_y)
    data_x_train, data_x_val, data_y_train, data_y_val = train_test_split(data_x_train, data_y_train, test_size=0.3, random_state=1, stratify=data_y_train)
    
    data_list = [data_x_train, data_x_val, data_x_test, data_y_train, data_y_val, data_y_test]
    name_list = ['data_x_train', 'data_x_val', 'data_x_test', 'data_y_train', 'data_y_val', 'data_y_test']
    data_i = {}
    for data_ele, name in zip(data_list, name_list):
        data_i[name] = data_ele
        save_path = data_path + name + '_{}.npy'.format(i+1)
        np.save(save_path, data_ele)
    sub_data.append(data_i)

### Analyze data

In [0]:
def plot_window(window, fig_title='window'):
    '''
    plot the data in a window
    '''
    fig = plt.figure()
    n = window.shape[-1]

    axis = ['x', 'y', 'z']
    title = ['total acc ' + i for i in axis] + ['body acc ' +
                                                i for i in axis] + ['body gyro ' + i for i in axis]
    for i in range(n):
        ax = fig.add_subplot(n, 1, i+1)
        ax.plot(window[:,i], 'r')
        ax.xaxis.set_visible(False)
        ax.set_ylim([-1.0, 1.0])
        # ax.set_title(title[i])
    
    fig.suptitle(fig_title)

    plt.show()
    
def class_breakdown(data):
    '''
    summarize the balance of classes
    '''
    # activities = ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING', 'STANDING', 'LAYING']
    df = pd.DataFrame(data)
    df[0].value_counts().plot(kind='bar', title='Acticity')
    plt.show()

def plot_subject(X, y):
    '''
    plot the data for a single subject
    '''
    fig = plt.figure(figsize=(10, 20))
    n = X.shape[2] + 1
    axis = ['x', 'y', 'z']
    title = ['total acc ' + i for i in axis] + ['body acc ' +
                                                i for i in axis] + ['body gyro ' + i for i in axis]
    for i in range(n-1):
        ax = fig.add_subplot(n, 1, i + 1)
        series = to_series(X[..., i])
        ax.plot(series, 'r')
        ax.set_title(title[i])
        ax.xaxis.set_visible(False)
        ax.set_ylim([min(series) - np.std(series), max(series) + np.std(series)])
        ax.grid(True)

    ax = fig.add_subplot(n, 1, n)
    ax.plot(y, 'g')
    ax.set_title('activity')
    
    plt.subplots_adjust(hspace=0.2)
    plt.show()

## Classifier model

Use a multi-layers CNN as classifier

In [0]:
# dataset parameters
TIMESTEP = 128
CHANNELS = 9
WIN_SHAPE = (TIMESTEP, CHANNELS)
NUM_CLASSES = 6

# model parameters
CLASSIFIER_FILTERS = 64

# train parameters
BATCH_SIZE = 16

In [0]:
def build_classifier():
    input = tf.keras.layers.Input(shape=WIN_SHAPE)

    c = tf.keras.layers.Conv1D(CLASSIFIER_FILTERS, 5, activation='relu')(input)
    c = tf.keras.layers.Conv1D(CLASSIFIER_FILTERS, 5, activation='relu')(c)
    c = tf.keras.layers.Conv1D(CLASSIFIER_FILTERS, 5, activation='relu')(c)
    c = tf.keras.layers.Conv1D(CLASSIFIER_FILTERS, 5, activation='relu')(c)

    c = tf.keras.layers.Dropout(0.5)(c)
    c = tf.keras.layers.LSTM(TIMESTEP, return_sequences=True)(c)
    c = tf.keras.layers.Dropout(0.5)(c)
    c = tf.keras.layers.LSTM(TIMESTEP, return_sequences=True)(c)

    c = tf.keras.layers.Flatten()(c)
    c = tf.keras.layers.Dropout(0.5)(c)
    output = tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')(c)

    return tf.keras.Model(inputs=input, outputs=output)

train classifier using target subject

In [0]:
sub_num = 30
classifiers = []
for i in range(sub_num):
    model_path = ckp_path + 'classifier_{}.hdf5'.format(i+1)

    x_train = sub_data[i]['data_x_train']
    y_train = tf.one_hot(sub_data[i]['data_y_train'], NUM_CLASSES)
    x_val = sub_data[i]['data_x_val']
    y_val = tf.one_hot(sub_data[i]['data_y_val'], NUM_CLASSES)

    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=5)
    mc = tf.keras.callbacks.ModelCheckpoint(model_path, monitor='val_accuracy', mode='max', verbose=0, save_best_only=True)
    
    model = build_classifier()
    model.compile(optimizer='rmsprop', loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
    classifiers.append(model)

    model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=100, batch_size=4, verbose=0, callbacks=[es, mc])

In [0]:
sub_num = 30
acc = np.zeros(shape=(sub_num,sub_num))
conf_mat = np.zeros(shape=(sub_num,sub_num, NUM_CLASSES, NUM_CLASSES))

for i in range(sub_num):
    for j in range(sub_num):
        x_test = sub_data[j]['data_x_test']
        y_test = tf.one_hot(sub_data[j]['data_y_test'], NUM_CLASSES)
        _, acc[i][j] = classifiers[i].evaluate(x_test, y_test, batch_size=4)
        y_test_pred = classifiers[i].predict(x_test)
        conf_mat[i][j] = confusion_matrix(y_test.numpy().argmax(axis=1), y_test_pred.argmax(axis=1))


save_path = data_path + 'notransfer_acc.npy'
np.save(save_path, acc)
save_path = data_path + 'notransfer_conf_mat.npy'
np.save(save_path, conf_mat)



In [0]:
acc_df = pd.DataFrame(acc)
acc_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
0,1.0,0.721311,0.84058,0.578125,0.721311,0.815385,0.83871,0.877193,0.603448,0.559322,0.859375,0.8125,0.787879,0.753846,0.772727,0.594595,0.635135,0.945205,0.861111,0.802817,0.658537,0.784615,0.826667,0.857143,0.768293,0.848101,0.881579,0.701299,0.869565,0.831169
1,0.914286,1.0,0.797101,0.453125,0.540984,0.8,0.677419,0.77193,0.603448,0.423729,0.5625,0.78125,0.69697,0.584615,0.69697,0.581081,0.675676,0.794521,0.75,0.760563,0.634146,0.815385,0.733333,0.74026,0.426829,0.734177,0.881579,0.727273,0.507246,0.675325
2,0.885714,0.639344,1.0,0.390625,0.540984,0.861538,0.919355,0.684211,0.37931,0.457627,0.703125,0.78125,0.969697,0.815385,0.878788,0.648649,0.824324,0.849315,0.930556,0.971831,0.756098,0.876923,0.893333,0.974026,0.585366,0.810127,0.973684,0.753247,0.623188,0.753247
3,0.342857,0.327869,0.492754,0.96875,0.901639,0.507692,0.451613,0.491228,0.431034,0.254237,0.578125,0.40625,0.454545,0.446154,0.545455,0.22973,0.445946,0.465753,0.513889,0.549296,0.439024,0.553846,0.506667,0.519481,0.780488,0.670886,0.513158,0.272727,0.57971,0.415584
4,0.6,0.557377,0.652174,0.625,0.901639,0.723077,0.596774,0.54386,0.62069,0.355932,0.71875,0.75,0.575758,0.292308,0.772727,0.22973,0.5,0.60274,0.875,0.746479,0.231707,0.8,0.373333,0.623377,0.695122,0.822785,0.907895,0.337662,0.753623,0.506494
5,0.728571,0.606557,0.811594,0.65625,0.688525,1.0,0.612903,0.701754,0.517241,0.491525,0.921875,0.765625,0.742424,0.661538,0.863636,0.432432,0.540541,0.767123,0.847222,0.690141,0.585366,0.8,0.666667,0.857143,0.780488,0.873418,0.776316,0.571429,0.84058,0.727273
6,0.442857,0.47541,0.608696,0.359375,0.393443,0.538462,0.806452,0.491228,0.344828,0.220339,0.40625,0.59375,0.590909,0.507692,0.545455,0.378378,0.5,0.561644,0.541667,0.591549,0.439024,0.538462,0.586667,0.701299,0.426829,0.506329,0.618421,0.532468,0.434783,0.415584
7,0.7,0.688525,0.884058,0.515625,0.639344,0.876923,0.693548,1.0,0.655172,0.474576,0.59375,0.640625,0.863636,0.876923,0.636364,0.662162,0.743243,0.821918,0.861111,0.859155,0.756098,0.784615,0.893333,0.987013,0.609756,0.696203,0.947368,0.714286,0.681159,0.649351
8,0.7,0.557377,0.623188,0.484375,0.622951,0.4,0.725806,0.666667,0.810345,0.508475,0.328125,0.609375,0.590909,0.661538,0.606061,0.621622,0.648649,0.671233,0.722222,0.690141,0.621951,0.723077,0.68,0.805195,0.536585,0.696203,0.815789,0.636364,0.710145,0.649351
9,0.757143,0.836066,0.333333,0.6875,0.770492,0.723077,0.5,0.929825,0.706897,1.0,0.765625,0.734375,0.484848,0.215385,0.787879,0.5,0.324324,0.589041,0.777778,0.605634,0.207317,0.707692,0.52,0.558442,0.743902,0.78481,0.763158,0.636364,0.84058,0.532468


In [0]:
acc_df.iloc[[0,5,10,15],[0,5,10,15]]

Unnamed: 0,0,5,10,15
0,1.0,0.815385,0.859375,0.594595
5,0.728571,1.0,0.921875,0.432432
10,0.871429,0.907692,1.0,0.364865
15,0.4,0.384615,0.1875,1.0


In [0]:
for i in [0,5,10,15]:
    for j in [0,5,10,15]:
        if i == j:
            continue
        print("Confusion matrix of (%d,%d):" % (i,j))
        print(pd.DataFrame(conf_mat[i][j]))

Confusion matrix of (0,5):
      0    1     2    3     4     5
0  11.0  0.0   0.0  0.0   0.0   0.0
1   9.0  0.0   1.0  0.0   0.0   0.0
2   0.0  0.0  10.0  0.0   0.0   0.0
3   0.0  0.0   0.0  9.0   2.0   0.0
4   0.0  0.0   0.0  0.0  12.0   0.0
5   0.0  0.0   0.0  0.0   0.0  11.0
Confusion matrix of (0,10):
     0    1    2     3    4     5
0  8.0  4.0  0.0   0.0  0.0   0.0
1  1.0  7.0  3.0   0.0  0.0   0.0
2  0.0  0.0  9.0   0.0  0.0   0.0
3  0.0  0.0  0.0  11.0  0.0   0.0
4  0.0  0.0  0.0   1.0  8.0   0.0
5  0.0  0.0  0.0   0.0  0.0  12.0
Confusion matrix of (0,15):
     0    1     2    3     4     5
0  9.0  0.0   1.0  0.0   0.0   0.0
1  5.0  0.0   4.0  0.0   0.0   1.0
2  0.0  0.0  10.0  0.0   0.0   0.0
3  0.0  0.0   0.0  2.0  12.0   0.0
4  1.0  0.0   0.0  6.0   9.0   0.0
5  0.0  0.0   0.0  0.0   0.0  14.0
Confusion matrix of (5,0):
      0    1    2    3    4    5
0  12.0  7.0  0.0  0.0  0.0  0.0
1   2.0  8.0  1.0  0.0  0.0  0.0
2   0.0  3.0  7.0  0.0  0.0  0.0
3   0.0  0.0  0.0  9.0 