In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import tensorflow as tf
from sklearn.metrics import classification_report
from sklearn import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv1D, Dropout, MaxPooling1D, BatchNormalization
from keras.utils import plot_model

Run subject_segregation.py and check-subject-validity.py prior to this.

In [2]:
#Create uun list for testing, training and validation

random.seed(100)
src_folder = 'processed-data/'

uun_list = [s for s in os.listdir(src_folder) if s[0]== 's']
uun_list = random.sample(uun_list,len(uun_list))

print("Total size of list: " + str(len(uun_list)))


Total size of list: 78


In [3]:
def create_dataFrame(uun_list, df_name, src_folder):
    base_df = pd.DataFrame()

    for uun in uun_list:
      files_per_uun = [s for s in os.listdir(os.path.join(src_folder,uun))]
        
      for file in files_per_uun:
          # load data into a DataFrame 
          path = os.path.join(src_folder, uun, file)
          new_df = pd.read_csv(path)
          # merge into the base DataFrame 
          base_df = pd.concat([base_df, new_df])
            
    base_df.reset_index(drop=True, inplace=True)
    base_df.drop(base_df[base_df['sensor_type'] == "Thingy"].index, inplace = True)
    
    print(f"Data from: {df_name}")
    print(f"The data was collected using the sensors: {base_df.sensor_type.unique()}")
    print(f"The data was collected for the activities: {base_df.activity_type.unique()}")
    print(f"The number of activities collected: {len(base_df.activity_type.unique())}")
    print(f"The number of unique recordings is: {len(base_df.recording_id.unique())}")
    print(f"The subject IDs in the recordings are: {len(base_df.subject_id.unique())}")
    print("\n")
            
    return base_df

def clean_dataFrame(df):
    df.drop('notes', axis=1, inplace=True)
    df.drop('sensor_type', axis=1, inplace=True)
    df.drop('subject_id', axis=1, inplace=True)
    df.drop('activity_code', axis=1, inplace=True)
    df.dropna(inplace=True)
    
def get_sliding_windows(df):
    recording_id_list = df.recording_id.unique()
    sliding_windows = pd.DataFrame()
    window_size = 50 # 50 datapoints for the window size, which, at 25Hz, means 2 seconds
    step_size = 25 # this is 50% overlap

    window_number = 0 # start a counter at 0 to keep track of the window number

    for recording in recording_id_list:
        current_window = df.loc[df['recording_id'] == recording]
        large_enough_windows = [window for window in current_window.rolling(window=window_size, min_periods=window_size) if len(window) == window_size]
        overlapping_windows = large_enough_windows[::step_size] 

        for window in overlapping_windows:
            window.loc[:, 'window_id'] = window_number
            window_number += 1

        final_sliding_windows = pd.concat(overlapping_windows)
        sliding_windows = pd.concat([sliding_windows, final_sliding_windows])

    sliding_windows.reset_index(drop=True, inplace=True)
    return sliding_windows

def model_data(df_sliding_windows):
    X = []
    y = []

    for window_id, group in df_sliding_windows.groupby('window_id'):

        shape = group[columns_of_interest].values.shape
        
        X.append(group[columns_of_interest].values)
        y.append(class_labels[group["activity_type"].values[0]])
        
    X = np.asarray(X)
    y = np.asarray(pd.get_dummies(np.asarray(y)), dtype=np.float32)
    return (X,y)

def get_model_input(df):
    clean_dataFrame(df)
    df.drop(df[df['activity_type'] == "Climbing stairs"].index, inplace = True)
    df.drop(df[df['activity_type'] == "Descending stairs"].index, inplace = True)
    df.drop(df[df['activity_type'] == "Movement"].index, inplace = True)

    df['activity_type'] = df['activity_type'].replace(['Sitting', 'Sitting bent forward', 'Sitting bent backward', 'Standing', 'Desk work'], 'Sitting/Standing')
    df['activity_type'] = df['activity_type'].replace(['Lying down right', 'Lying down left', 'Lying down on back', 'Lying down on stomach'], 'Lying down')

    return model_data(get_sliding_windows(df))

def create_model():
    filters = 64
    kernel_size = 3
    n_features = 6
    activation='relu'
    n_classes = 4
    window_size = 50

    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear', 
                    input_shape=(window_size, n_features)))
    model.add(BatchNormalization())
    model.add(Activation(activation))

    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear'))
    model.add(BatchNormalization())
    model.add(Activation(activation))

    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear'))
    model.add(BatchNormalization())
    model.add(Activation(activation))

    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear'))
    model.add(BatchNormalization())
    model.add(Activation(activation))

    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_classes, activation='softmax'))

    model.summary()
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['accuracy'])

    return model

In [4]:
columns_of_interest = ['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']

class_labels = {
    'Sitting/Standing' : 0,
    'Walking at normal speed' : 1,
    'Lying down' : 2,
    'Running' : 3
}

In [5]:
accuracies = []

for x in range(len(uun_list)):
    current_list = ['s1892493','s1852056', 's2303866', 's2255740', 's1977764', 's1828233', 's1908422', 's1842855', 's1837624', 's1965695', 's2342658', 's1900673', 's1973139', 's1904845', 's1915425', 's1921856', 's1957945', 's1962038', 's2269664', 's2303353', 's1925182', 's1923493', 's1858629', 's1992413', 's2308859', 's1732873', 's1974565', 's1957847', 's1826377', 's1998909', 's1817967', 's1918275', 's1832137', 's1839825', 's1926989', 's1853226', 's2298733', 's1961351', 's1863792', 's1800600', 's1901843', 's1912558', 's1911027', 's1948094', 's1941321', 's1967984', 's1836526', 's1956488', 's1912614', 's2299270', 's1834237', 's1951735', 's1800825', 's1911455', 's1973235', 's1918258', 's1862671', 's1810150', 's1910268', 's1931698', 's1931736', 's1832512', 's1925715', 's1960578', 's1909083', 's1925709', 's1976098', 's1912575', 's1927811', 's1809887', 's1923449', 's1951693', 's1967087', 's1996403', 's1935680', 's1925695', 's1920337', 's1862323']
    testing_df = create_dataFrame([current_list[x],], "Testing DF", src_folder)
    del current_list[x]
    training_df = create_dataFrame(current_list, "Training DF", src_folder)

    (X_train, y_train) = get_model_input(training_df)
    (X_test, y_test) = get_model_input(testing_df)

    model = create_model()
    model.fit(X_train, y_train, batch_size=32, epochs=7)
    (_, accuracy) = model.evaluate(X_test, y_test)
    accuracies.append(accuracy * 100.0)
    
    print( str(x + 1) + " Iteration completed.")
    print("Accuracy of " + str(accuracy * 100.0) + " achieved.")
    

Data from: Testing DF
The data was collected using the sensors: ['Respeck']
The data was collected for the activities: ['Desk work' 'Sitting' 'Movement' 'Running' 'Standing'
 'Sitting bent backward' 'Descending stairs' 'Lying down right'
 'Lying down on stomach' 'Lying down on back' 'Sitting bent forward'
 'Climbing stairs' 'Walking at normal speed' 'Lying down left']
The number of activities collected: 14
The number of unique recordings is: 14
The subject IDs in the recordings are: 1


Data from: Training DF
The data was collected using the sensors: ['Respeck']
The data was collected for the activities: ['Walking at normal speed' 'Desk work' 'Sitting' 'Lying down on back'
 'Climbing stairs' 'Standing' 'Movement' 'Lying down left'
 'Sitting bent backward' 'Descending stairs' 'Running'
 'Sitting bent forward' 'Lying down right' 'Lying down on stomach']
The number of activities collected: 14
The number of unique recordings is: 1077
The subject IDs in the recordings are: 78




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 'Movement' 'Descending stairs' 'Lying down on stomach'
 'Lying down on back' 'Sitting bent backward' 'Running']
The number of activities collected: 14
The number of unique recordings is: 14
The subject IDs in the recordings are: 1


Data from: Training DF
The data was collected using the sensors: ['Respeck']
The data was collected for the activities: ['Desk work' 'Sitting' 'Movement' 'Running' 'Standing'
 'Sitting bent backward' 'Descending stairs' 'Lying down right'
 'Lying down on stomach' 'Lying down on back' 'Sitting bent forward'
 'Climbing stairs' 'Walking at normal speed' 'Lying down left']
The number of activities collected: 14
The number of unique recordings is: 1077
The subject IDs in the recordings are: 78


Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_68 (Conv1D)          (None, 48, 64)       

In [8]:
mean_score = sum(accuracies) / len(accuracies)
print("Mean Score of Model: ", mean_score)

Mean Score of Model:  97.08224481497055
