In [2]:
import numpy as np
import pandas as pd
from scipy.fft import fft, ifft
from scipy.interpolate import interp1d
from scipy.stats import entropy, iqr, kurtosis, mode, skew
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import (Activation, BatchNormalization, Bidirectional,
                                     Conv1D, Conv2D, Dense, Dropout,
                                     Flatten, GlobalAveragePooling1D, LSTM,
                                     MaxPool1D, MaxPooling2D, Reshape,
                                     TimeDistributed)
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import os
import pickle
from pandas import read_csv, unique
from scipy.signal import argrelextrema, butter, find_peaks, lfilter, lfilter_zi


2023-11-07 10:01:23.504574: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-07 10:01:24.115718: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-11-07 10:01:24.115741: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-11-07 10:01:26.071245: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [3]:
def signal_smoothing(df, cutoff):
    # Define the filter parameters
    nyquist_freq = 0.5 * 25  # Nyquist frequency for a 25 Hz sample rate
    normalized_cutoff = cutoff / nyquist_freq

    # Filter order selection (adjust as needed)
    order = 4

    # Create a Butterworth low-pass filter
    b, a = butter(order, float(normalized_cutoff), btype='low')
    
    # Apply the filter to the data with initial conditions
    zi = lfilter_zi(b, a)
    filtered_data, _ = lfilter(b, a, df, zi=zi*df[0])
    
    return filtered_data

In [12]:
def base_feature_extraction(df, fft_cols=False):
    time_interval = 1/25 # 1/freq
    pca = PCA(n_components=1)
    
    # calculate magnitude of vector
    df["magnitude"] = np.sqrt(df['accel_x']**2 + df['accel_y']**2 + df['accel_z']**2)
    df["gyro_magnitude"] = np.sqrt(df['gyro_x']**2 + df['gyro_y']**2 + df['gyro_z']**2)
    # vector normalise 
    df["accel_x"] = df["accel_x"]/df['magnitude']
    df["accel_y"] = df["accel_y"]/df['magnitude']
    df["accel_z"] = df["accel_z"]/df['magnitude']
    df["gyro_x"] = df["gyro_x"]/df['gyro_magnitude']
    df["gyro_y"] = df["gyro_y"]/df['gyro_magnitude']
    df["gyro_z"] = df["gyro_z"]/df['gyro_magnitude']
    
    max_jerk = 10
    # calculate jerk of data
    df["jerk_x"] = np.clip(np.gradient(df["accel_x"], time_interval), -max_jerk, max_jerk)
    df["jerk_y"] = np.clip(np.gradient(df["accel_y"], time_interval), -max_jerk, max_jerk)
    df["jerk_z"] = np.clip(np.gradient(df["accel_z"], time_interval), -max_jerk, max_jerk)
    df["jerk_mag"] = np.sqrt(df['jerk_x']**2 + df['jerk_y']**2 + df['jerk_z']**2)

    # calculate jerk of data
    df["gyro_jerk_x"] = np.clip(np.gradient(df["gyro_x"], time_interval), -max_jerk, max_jerk)
    df["gyro_jerk_y"] = np.clip(np.gradient(df["gyro_y"], time_interval), -max_jerk, max_jerk)
    df["gyro_jerk_z"] = np.clip(np.gradient(df["gyro_z"], time_interval), -max_jerk, max_jerk)
    df["gyro_jerk_mag"] = np.sqrt(df['gyro_jerk_x']**2 + df['gyro_jerk_y']**2 + df['gyro_jerk_z']**2)
    
    

    # calculate most segnificant components
    df["pca_accel"] = pca.fit_transform(df[["accel_x","accel_y","accel_z"]])
    df["pca_jerk"] = pca.fit_transform(df[["jerk_x","jerk_y","jerk_z"]])
    df["pca_gyro"] = pca.fit_transform(df[["gyro_x","gyro_y","gyro_z"]])
    df["pca_gyro_jerk"] = pca.fit_transform(df[["gyro_jerk_x","gyro_jerk_y","gyro_jerk_z"]])
    # fast fourier transform to calculate frequency domain
    if fft_cols:
        time_series_cols = df[["accel_x","accel_y", "accel_z", "jerk_x","jerk_y","jerk_z","magnitude", "jerk_mag", "pca_accel", "pca_jerk"]]
        frequency_space = np.abs(fft(time_series_cols))
        df[["f_accel_x","f_accel_y", "f_accel_z", "f_jerk_x","f_jerk_y","f_jerk_z","f_magnitude", "f_jerk_mag", "f_pca_accel", "f_pca_jerk"]] = frequency_space
    
    
    df = df.dropna()
    return df

In [5]:
def import_file(file_dir):
    # read data from name of file
    student, sensor, activity, activity_subtype = file_dir.split("/")[-1][:-4].split("_")[:4]
    # read in database
    df = pd.read_csv(file_dir)
    # drop features we dont want
    df = df[["timestamp","accel_x","accel_y", "accel_z", "gyro_x","gyro_y","gyro_z"]]
    df['timestamp'] = df['timestamp'].astype(np.float64)
    
    # add activity information
    df['activity'] = activity
    df['activity_subtype'] = activity_subtype
    
    #sort by timestamp
    df.sort_values(by=['timestamp'])
    # drop timestamp
    df = df.drop("timestamp", axis = 1)
    
    return df

In [13]:
def get_df(directory='Respeck', freq=2.25, fft_cols=False):
    # Loop through the clean-named subfolders
    all_data = pd.DataFrame()
    data_chunks = []
    
    for student in os.listdir(directory):
        if "placeholder" not in student:
            student_directory = os.path.join(directory, student)
            # itterate through students data
            for file in os.listdir(student_directory):
                file_dir = os.path.join(student_directory, file)
                if os.path.isfile(file_dir) and not "S37" in file:
                    df = import_file(file_dir)
                    if freq != -1:
                        df["accel_x"] = signal_smoothing(df["accel_x"],freq)
                        df["accel_y"] = signal_smoothing(df["accel_y"],freq)
                        df["accel_z"] = signal_smoothing(df["accel_z"],freq)
                        df["gyro_x"] = signal_smoothing(df["gyro_x"],freq)
                        df["gyro_y"] = signal_smoothing(df["gyro_y"],freq)
                        df["gyro_z"] = signal_smoothing(df["gyro_z"],freq)
                    df = base_feature_extraction(df, fft_cols)
                    all_data = pd.concat([all_data, df])
                    data_chunks.append(df)
                    
    return all_data, data_chunks

In [14]:
df, data_chunks = get_df('Respeck')

In [15]:
df

Unnamed: 0,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,activity,activity_subtype,magnitude,gyro_magnitude,...,jerk_z,jerk_mag,gyro_jerk_x,gyro_jerk_y,gyro_jerk_z,gyro_jerk_mag,pca_accel,pca_jerk,pca_gyro,pca_gyro_jerk
0,0.696617,0.138001,0.704046,-0.846520,-0.521656,0.106205,lying down right,hyperventilating,1.043338,10.004198,...,0.000897,0.002350,0.007812,-0.013795,-0.005546,0.016796,0.039461,-0.001666,-0.951896,0.067151
1,0.696597,0.137917,0.704082,-0.846207,-0.522208,0.105984,lying down right,hyperventilating,1.043300,9.985477,...,0.002540,0.008188,0.021814,-0.039946,-0.023321,0.051141,0.039446,-0.003445,-0.951978,0.073885
2,0.696533,0.137384,0.704249,-0.844775,-0.524852,0.104340,lying down right,hyperventilating,1.043008,9.850482,...,0.005843,0.026496,0.046117,-0.090606,-0.084849,0.132422,0.039286,-0.011587,-0.952389,0.086281
3,0.696531,0.135850,0.704549,-0.842518,-0.529456,0.099196,lying down right,hyperventilating,1.042058,9.360894,...,0.005927,0.052592,0.015694,-0.057961,-0.178025,0.187880,0.038611,-0.028929,-0.953155,0.075761
4,0.696863,0.133217,0.704723,-0.843519,-0.529489,0.090098,lying down right,hyperventilating,1.040374,8.139290,...,-0.002116,0.072286,-0.230686,0.342209,-0.221206,0.468246,0.037052,-0.050645,-0.953415,-0.035355
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,0.201749,-0.971641,0.123337,0.910020,0.409318,0.065745,sitting,coughing,0.987379,7.964241,...,-0.267705,0.312878,0.597384,-1.108308,-4.266990,4.448867,0.105078,-0.308982,-0.819929,-0.827137
756,0.206151,-0.971359,0.118171,0.910986,0.381364,-0.157056,sitting,coughing,0.991460,5.860409,...,-0.002802,0.058958,-2.877593,0.304458,-8.214600,8.709356,0.111831,-0.025858,-0.730192,-5.436121
757,0.206366,-0.970699,0.123112,0.679812,0.433675,-0.591423,sitting,coughing,0.995586,4.168407,...,0.216142,0.222915,-10.000000,1.481430,-8.861802,13.443443,0.107908,0.218073,-0.341499,-12.350356
758,0.202016,-0.969969,0.135462,0.012853,0.499878,-0.866000,sitting,coughing,0.999288,4.669976,...,0.353438,0.395822,-10.000000,0.971608,-2.313102,10.309920,0.095302,0.396333,0.380021,-10.120294


In [16]:
# this outputs data that has both the physical and subtype in the y for all stationary tasks
def get_X_y_all(data_chunks, window_size = 50, overlap = 25):
    X = []
    y = []
    for data in data_chunks:
        if not data.iloc[0]['activity'] in ["normal walking", "shuffle walking","running", "miscellaneous movements", "ascending stairs", "descending stairs"]:
            for i in range(0, len(data) - window_size + 1, overlap):
                # get window
                window = data[i:i + window_size].copy()
                x_data = window.drop(columns=['activity', 'activity_subtype'])
                # create X matrix
                X.append(x_data)
                # create y matrix
                y.append(window["activity"].iloc[0] +"|" +window["activity_subtype"].iloc[0])
    return np.array(X), np.array(y).reshape(-1,1)


In [10]:
# get data with 2.25hz smoothing and no fft
df, data_chunks = get_df('Respeck')

In [17]:
data_chunks[0]

Unnamed: 0,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,activity,activity_subtype,magnitude,gyro_magnitude,...,jerk_z,jerk_mag,gyro_jerk_x,gyro_jerk_y,gyro_jerk_z,gyro_jerk_mag,pca_accel,pca_jerk,pca_gyro,pca_gyro_jerk
0,0.696617,0.138001,0.704046,-0.846520,-0.521656,0.106205,lying down right,hyperventilating,1.043338,10.004198,...,0.000897,0.002350,0.007812,-0.013795,-0.005546,0.016796,0.039461,-0.001666,-0.951896,0.067151
1,0.696597,0.137917,0.704082,-0.846207,-0.522208,0.105984,lying down right,hyperventilating,1.043300,9.985477,...,0.002540,0.008188,0.021814,-0.039946,-0.023321,0.051141,0.039446,-0.003445,-0.951978,0.073885
2,0.696533,0.137384,0.704249,-0.844775,-0.524852,0.104340,lying down right,hyperventilating,1.043008,9.850482,...,0.005843,0.026496,0.046117,-0.090606,-0.084849,0.132422,0.039286,-0.011587,-0.952389,0.086281
3,0.696531,0.135850,0.704549,-0.842518,-0.529456,0.099196,lying down right,hyperventilating,1.042058,9.360894,...,0.005927,0.052592,0.015694,-0.057961,-0.178025,0.187880,0.038611,-0.028929,-0.953155,0.075761
4,0.696863,0.133217,0.704723,-0.843519,-0.529489,0.090098,lying down right,hyperventilating,1.040374,8.139290,...,-0.002116,0.072286,-0.230686,0.342209,-0.221206,0.468246,0.037052,-0.050645,-0.953415,-0.035355
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,0.722692,0.132215,0.678406,-0.790251,-0.598202,0.132881,lying down right,hyperventilating,1.019422,19.494904,...,0.532161,0.918462,0.567581,-0.633686,0.552565,1.014413,0.005776,0.916343,-0.956646,0.161471
756,0.699302,0.150913,0.698715,-0.769145,-0.620505,0.152937,lying down right,hyperventilating,1.021194,17.378000,...,0.435479,0.795994,0.532275,-0.521851,0.555916,0.929886,0.041925,0.794024,-0.955071,0.108242
757,0.681087,0.165538,0.713245,-0.747669,-0.639950,0.177354,lying down right,hyperventilating,1.025784,12.445023,...,0.265207,0.514162,0.868063,-0.564305,1.331815,1.686922,0.069392,0.511888,-0.951702,-0.023303
758,0.672067,0.173275,0.719932,-0.699700,-0.665649,0.259482,lying down right,hyperventilating,1.031388,5.371397,...,0.064254,0.144072,10.000000,10.000000,-0.233620,14.144065,0.082981,0.141180,-0.934877,-14.033938


In [18]:
X,y = get_X_y_all(data_chunks)
file = open('X_respeck_gyro', 'wb')
pickle.dump(X, file)
file.close()
file = open('y_respeck_gyro', 'wb')
pickle.dump(y, file)
file.close()

In [None]:
# X,y = get_X_y_all(data_chunks)
# print(X.shape, y.shape)
# file = open('X_respeck_all', 'wb')
# pickle.dump(X, file)
# file.close()
# file = open('y_respeck_all', 'wb')
# pickle.dump(y, file)
# file.close()
# print(y[0])

In [None]:
# for w in [25,50]:#,75,100,125,150]:
#     X,y = get_X_y(data_chunks, w)
#     file = open('X_respeck_' + str(w), 'wb')
#     pickle.dump(X, file)
#     file.close()
#     file = open('y_respeck_' + str(w), 'wb')
#     pickle.dump(y, file)
#     file.close()

In [None]:
# for freq in [1,2.25,5]:
#     df, data_chunks = get_df('Respeck', freq=freq)
#     X,y = get_X_y(data_chunks)
#     file = open('X_respeck_' + str(freq), 'wb')
#     pickle.dump(X, file)
#     file.close()
#     file = open('y_respeck_' + str(freq), 'wb')
#     pickle.dump(y, file)
#     file.close()

In [None]:
# file = open('X_thingy', 'wb')
# pickle.dump(X, file)
# file.close()
# file = open('y_thingy', 'wb')
# pickle.dump(y, file)
# file.close()

In [None]:
def column_summary(data):
    # Calculate Mean, Standard Deviation, Median Absolute Deviation
    mean_value = np.mean(data)
    std_dev = np.std(data)
    mad = np.median(np.abs(data - np.median(data)))

    # Calculate Minimum and Maximum Values
    minimum = np.min(data)
    maximum = np.max(data)

    # Calculate Signal Magnitude Area
    sma = np.sum(np.abs(data))

    # Calculate Energy Measure
    energy = np.sum(data ** 2) / len(data)

    # Calculate Inter-quartile Range
    interquartile_range = iqr(data)

    # Calculate Signal Entropy
    signal_entropy = entropy(data)
    
    # calculate number of peaks
    num_peaks = len(find_peaks(data))
    
    #skewness
    skewness = skew(data)
    
    #kurtosis
    kurto = kurtosis(data)

    # Auto-regression coefficients using Burg order equal to 4
    # burg_order = 4
    # coefficients = lfilter(*butter(burg_order, 0.5, output='ba'), data)
    
    return np.array([mean_value, std_dev, mad, minimum, maximum, sma, energy, num_peaks,
                    interquartile_range, signal_entropy, kurto,skewness])
    