<h1>Activity Classification based on Accelerometer Data</h1>

In [3]:
import numpy as np
import pandas as pd
import glob
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, FunctionTransformer
from sklearn.impute import SimpleImputer
import keras_tuner as kt

<h2>Preprocessing</h2>

In [4]:
names = ['subject-id', 'activity-code', 'timestamp', 'x', 'y', 'z']

path = r'wisdm-dataset/raw/phone/accel' # use your path
all_files = glob.glob(path + "/*.txt")

li = []

for filename in all_files:
    df = pd.read_csv(filename, names=names)
    li.append(df)

frame_phone_accel = pd.concat(li, axis=0, ignore_index=True)

frame_phone_accel['z'] = frame_phone_accel['z'].str.rstrip(';').astype(float)

In [5]:
X_train_full_phone_accel, X_test_phone_accel, y_train_full_phone_accel, y_test_phone_accel = train_test_split(frame_phone_accel[['timestamp', 'x', 'y', 'z']], frame_phone_accel[['timestamp', 'activity-code']], test_size=0.1, random_state=150, stratify=frame_phone_accel['subject-id'])
X_train_phone_accel, X_valid_phone_accel, y_train_phone_accel, y_valid_phone_accel = train_test_split(X_train_full_phone_accel, y_train_full_phone_accel, test_size=X_test_phone_accel.shape[0], random_state=150)

In [6]:
def convert_timestamp_to_datetime(X):
    return pd.to_datetime(X, unit='ns')

timestamp_transformer = FunctionTransformer(convert_timestamp_to_datetime)
time_pipeline = make_pipeline(timestamp_transformer)

X_train_phone_accel['timestamp'] = time_pipeline.fit_transform(X_train_phone_accel['timestamp'])
X_valid_phone_accel['timestamp'] = time_pipeline.fit_transform(X_valid_phone_accel['timestamp'])
X_test_phone_accel['timestamp'] = time_pipeline.fit_transform(X_test_phone_accel['timestamp'])

y_train_phone_accel['timestamp'] = time_pipeline.fit_transform(y_train_phone_accel['timestamp'])
y_valid_phone_accel['timestamp'] = time_pipeline.fit_transform(y_valid_phone_accel['timestamp'])
y_test_phone_accel['timestamp'] = time_pipeline.fit_transform(y_test_phone_accel['timestamp'])

In [7]:
num_sets = [X_train_phone_accel, X_valid_phone_accel, X_test_phone_accel]
cat_sets = [y_train_phone_accel, y_valid_phone_accel, y_test_phone_accel]

<h3>Due to limited computational resources, datasets will be shortened to every 1 minute</h3>

In [8]:
def set_index(df: pd.DataFrame):
    try:
        return df.set_index('timestamp', inplace=True)
    except:
        pass

def resample_num(df: pd.DataFrame):
    return df.resample('1min').mean()

def resample_cat(df: pd.DataFrame):
    return df.resample('1min').max()

for set in num_sets:
    set_index(set)

for set in cat_sets:
    set_index(set)

X_train_phone_accel_resampled = resample_num(X_train_phone_accel)
X_valid_phone_accel_resampled = resample_num(X_train_phone_accel)
X_test_phone_accel_resampled = resample_num(X_test_phone_accel)

y_train_phone_accel_resampled = resample_cat(y_train_phone_accel)
y_valid_phone_accel_resampled = resample_cat(y_valid_phone_accel)
y_test_phone_accel_resampled = resample_cat(y_test_phone_accel)

In [9]:
num_imputer = SimpleImputer(strategy='constant', fill_value=0).set_output(transform='pandas')
X_train_phone_accel_resampled = num_imputer.fit_transform(X_train_phone_accel_resampled)
X_valid_phone_accel_resampled = num_imputer.transform(X_valid_phone_accel_resampled)
X_test_phone_accel_resampled = num_imputer.transform(X_test_phone_accel_resampled)

<h3>Feature scaling and encoding</h3>

In [10]:
phone_accel_scaler = MinMaxScaler().set_output(transform='pandas')
phone_gyro_scaler = MinMaxScaler().set_output(transform='pandas')
watch_accel_scaler = MinMaxScaler().set_output(transform='pandas')
watch_gyro_scaler = MinMaxScaler().set_output(transform='pandas')

scaled_X_train_phone_accel = phone_accel_scaler.fit_transform(X_train_phone_accel_resampled) 
scaled_X_valid_phone_accel = phone_accel_scaler.transform(X_valid_phone_accel_resampled)
scaled_X_test_phone_accel = phone_accel_scaler.transform(X_test_phone_accel_resampled)

In [11]:
encoder = OneHotEncoder(sparse_output=False).set_output(transform='pandas')
y_train_phone_accel_cat = encoder.fit_transform(y_train_phone_accel_resampled)
y_valid_phone_accel_cat = encoder.transform(y_valid_phone_accel_resampled)
y_test_phone_accel_cat = encoder.transform(y_test_phone_accel_resampled)

<h1>Hyperparameter Tuning</h1>

In [12]:
def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=1, max_value=5)
    n_neurons = hp.Int("n_neurons", min_value=10, max_value=100)
    learning_rate = hp.Float("learning_rate", min_value=1e-6, max_value=10, sampling="log")
    optimizer = hp.Choice("optimizer", values=["sgd", "rmsprop", "adam", "adamax", "nadam", "adamw"])

    if optimizer == "sgd":
        momentum = hp.Float('momentum', min_value=0, max_value=1)
        nesterov = hp.Boolean('nesterov')
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum, nesterov=nesterov)

    elif optimizer == "rmsprop":
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

    elif optimizer == "adam":
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    elif optimizer == "adamax":
        optimizer = tf.keras.optimizers.Adamax(learning_rate=learning_rate)

    elif optimizer == "nadam":
        optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate)

    else:
        optimizer = tf.keras.optimizers.AdamW(learning_rate=learning_rate)


    phone_accel_inp = tf.keras.Input(shape=(3,))

    # Create dictionary to store hidden layers per input
    phone_accel_hidden_layer_dict = {}
    phone_accel_hidden_layer_dict['hidden1'] = tf.keras.layers.Dense(n_neurons, activation='relu')(phone_accel_inp)
    # Adds remaining n - 1 hidden layers
    for i in range(2, n_hidden + 1):
        phone_accel_hidden_layer_dict['hidden'+str(i)] = tf.keras.layers.Dense(n_neurons, activation='relu')(phone_accel_hidden_layer_dict['hidden'+str(i-1)])

    out = tf.keras.layers.Dense(19, activation='softmax', name='Activity')(phone_accel_hidden_layer_dict['hidden'+str(n_hidden)])

    model = tf.keras.Model(phone_accel_inp, out)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [13]:
# To be able to tune parameters in the .fit() model
class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        return build_model(hp)
    
    def fit(self, hp, model, X, y, **kwargs):
        batch_size = hp.Int('batch_size', min_value=2, max_value=32)
        return model.fit(X, y, batch_size=batch_size, **kwargs)


In [15]:
bayesian_opt_tuner = kt.BayesianOptimization(MyHyperModel(), objective='val_accuracy', seed=150, max_trials=100, overwrite=False, directory='wisdm_hypermodel_trials', project_name='my_bayesian_search')
early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

In [341]:
bayesian_opt_tuner.search(scaled_X_train_phone_accel, y_train_phone_accel_cat, validation_data=(scaled_X_valid_phone_accel, y_valid_phone_accel_cat), epochs=500, callbacks=[early_stopping_cb])

Epoch 1/500


[1m3370/3370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.0057 - loss: 2.9030 - val_accuracy: 0.0043 - val_loss: 2.8621
Epoch 2/500
[1m3370/3370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.0072 - loss: 2.8499 - val_accuracy: 0.9090 - val_loss: 2.8132
Epoch 3/500
[1m3370/3370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9069 - loss: 2.8011 - val_accuracy: 0.9064 - val_loss: 2.7638
Epoch 4/500
[1m3370/3370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9085 - loss: 2.7507 - val_accuracy: 0.9062 - val_loss: 2.7125
Epoch 5/500
[1m3370/3370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9051 - loss: 2.6992 - val_accuracy: 0.9062 - val_loss: 2.6583
Epoch 6/500
[1m3370/3370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9035 - loss: 2.6452 - val_accuracy: 0.9062 - val_loss: 2.6011
Epoch 7/500
[1m

KeyboardInterrupt: 

In [98]:
best_model.predict(X_test_phone_accel.head(1000))

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


array([[0.09625658, 0.09123437, 0.11962746, ..., 0.03263706, 0.03468076,
        0.03349104],
       [0.06517798, 0.09440621, 0.0996747 , ..., 0.06293933, 0.02388868,
        0.04093253],
       [0.06903262, 0.14687477, 0.07293101, ..., 0.05375191, 0.02223265,
        0.03528691],
       ...,
       [0.0994477 , 0.10225948, 0.03444537, ..., 0.01946354, 0.01421452,
        0.03750743],
       [0.07665082, 0.0833503 , 0.08437973, ..., 0.04352269, 0.03786774,
        0.0423969 ],
       [0.08526269, 0.13837524, 0.08713396, ..., 0.02302231, 0.02138955,
        0.06070355]], dtype=float32)