# Connect to Google Drive

reference: https://kgptalkie.com/human-activity-recognition-using-accelerometer-data/

Archived: https://web.archive.org/web/20210118012346/https://kgptalkie.com/human-activity-recognition-using-accelerometer-data/

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Imports

In [2]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization, InputLayer
from tensorflow.keras.optimizers import Adam
from keras.layers.recurrent import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping
import keras
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import cross_validate
from sklearn.model_selection import LeaveOneGroupOut
import scipy.stats as stats

# Set Variables for Data Processing

In [3]:
balance_data = False
include_seizure_data = True
dog_names = ["Molly", "Mabel", "Bernie"]
activities = ["walk", "sleep", "run_jog", "play", "leisure", "car", "seizure"]

# Create The Dataframe

In [4]:
data = pd.DataFrame()
for activity in activities:
  if activity == 'seizure' and include_seizure_data is False:
    continue
  tmp = pd.concat([pd.read_csv(f) for f in glob.glob(f'/content/drive/MyDrive/Smart Dog Collar/Data/03-31-2021_snapshot/{activity}/*.csv')], ignore_index = True)
  tmp['activity'] = activity
  data = pd.concat([data, tmp])
display(data)

Unnamed: 0,time,g_x,g_y,g_z,a_x,a_y,a_z,activity
0,02-26-2021_08-24-29,8.018349,-6.008378,11.145768,-1.74219,-28.085938,8.539062,walk
1,02-26-2021_08-24-29,3.098705,-5.625526,5.541777,16.7422,-11.281250,-25.281250,walk
2,02-26-2021_08-24-29,2.991028,-0.823131,4.053441,-17.5391,28.843750,-37.828125,walk
3,02-26-2021_08-24-29,10.834701,-1.301695,8.037491,-25.1562,70.070312,-26.468750,walk
4,02-26-2021_08-24-29,6.577869,-6.441478,9.887143,-25.9062,108.960938,-25.882812,walk
...,...,...,...,...,...,...,...,...
196375,03-26-2021_16-56-47,1.378266,-9.868000,4.048656,0.0234529,-0.367474,-0.047724,seizure
196376,03-26-2021_16-56-47,-0.131605,-9.633504,4.029513,0.0234529,-0.367474,-0.047724,seizure
196377,03-26-2021_16-56-47,0.524028,-8.609376,3.792624,0.0234529,-0.367474,-0.047724,seizure
196378,03-26-2021_16-56-47,0.961915,-9.133404,4.007978,0.0234529,-0.367474,-0.047724,seizure


In [5]:
print("-----Info-----")
data.info()
num_activities = len(data['activity'].unique())
print("# of Activities:", num_activities)
print("Data Shape:", data.shape)
print("Null Counts:")
data.isnull().sum()

-----Info-----
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5561709 entries, 0 to 196379
Data columns (total 8 columns):
 #   Column    Dtype  
---  ------    -----  
 0   time      object 
 1   g_x       float64
 2   g_y       float64
 3   g_z       float64
 4   a_x       object 
 5   a_y       float64
 6   a_z       float64
 7   activity  object 
dtypes: float64(5), object(3)
memory usage: 381.9+ MB
# of Activities: 7
Data Shape: (5561709, 8)
Null Counts:


time        62
g_x         64
g_y         64
g_z         65
a_x         66
a_y         67
a_z         67
activity     0
dtype: int64

# Clean the Data

In [6]:
# remove all rows with NaN or null values
data = data.dropna() 
# double check all NaN or null values were removed
assert data.isnull().values.any() == False

In [7]:
# make sure everything is a float value
data['a_x'] = data['a_x'].astype('float')
data['a_y'] = data['a_y'].astype('float')
data['a_z'] = data['a_z'].astype('float')
data['g_x'] = data['g_x'].astype('float')
data['g_y'] = data['g_y'].astype('float')
data['g_z'] = data['g_z'].astype('float')
# data.info()

In [8]:
df = data.drop(columns=['time'])
display(df.head())

Unnamed: 0,g_x,g_y,g_z,a_x,a_y,a_z,activity
0,8.018349,-6.008378,11.145768,-1.742188,-28.085938,8.539062,walk
1,3.098705,-5.625526,5.541777,16.742188,-11.28125,-25.28125,walk
2,2.991028,-0.823131,4.053441,-17.539062,28.84375,-37.828125,walk
3,10.834701,-1.301695,8.037491,-25.15625,70.070312,-26.46875,walk
4,6.577869,-6.441478,9.887143,-25.90625,108.960938,-25.882812,walk


## Optionally Balance the Data (even out the numbers of each activity)


In [9]:
df['activity'].value_counts()

sleep      3325855
walk        651590
run_jog     553781
car         445039
play        317001
seizure     196376
leisure      72000
Name: activity, dtype: int64

In [10]:
if (balance_data):
  #set to lowest count
  lowest = df['activity'].value_counts().min()

  Leisure = df[df['activity']=='leisure'].head(lowest).copy()
  Play = df[df['activity']=='play'].head(lowest).copy()
  Car = df[df['activity']=='car'].head(lowest).copy()
  Sleep = df[df['activity']=='sleep'].head(lowest).copy()
  Run = df[df['activity']=='run'].head(lowest).copy()
  Walk = df[df['activity']=='walk'].head(lowest).copy()
  Seizure = df[df['activity']=='seizure'].head(lowest).copy()

  balanced_data = pd.DataFrame()
  if (include_seizure_data):
    df = balanced_data.append([Leisure, Play, Car, Walk, Run, Sleep, Seizure])
  else: 
    df = balanced_data.append([Leisure, Play, Car, Walk, Run, Sleep])
print("Shape:", df.shape)
df['activity'].value_counts()

Shape: (5561642, 7)


sleep      3325855
walk        651590
run_jog     553781
car         445039
play        317001
seizure     196376
leisure      72000
Name: activity, dtype: int64

## Convert Labels to Numeric

In [11]:
le = LabelEncoder()
df['activity'] = le.fit_transform(df['activity'])
classes = le.classes_
print("Classes:", classes)

Classes: ['car' 'leisure' 'play' 'run_jog' 'seizure' 'sleep' 'walk']


# Standardize The Data

In [12]:
X, y = df.iloc[:, :-1].values, df.iloc[:, -1].values
X = StandardScaler().fit_transform(X)
scaled_df = pd.DataFrame(data=X, columns=df.columns[:-1])
scaled_df['label'] = df['activity'].values
display(scaled_df)

Unnamed: 0,g_x,g_y,g_z,a_x,a_y,a_z,label
0,1.196273,-1.081498,1.421736,-0.073822,-1.294081,0.425378,6
1,0.395422,-0.997478,0.547428,0.546037,-0.524955,-1.332338,6
2,0.377894,0.056450,0.315224,-0.603559,1.311507,-1.984427,6
3,1.654736,-0.048575,0.936797,-0.858995,3.198385,-1.394055,6
4,0.961782,-1.176546,1.225371,-0.884146,4.978352,-1.363603,6
...,...,...,...,...,...,...,...
5561637,0.042518,-1.711649,0.272666,-0.014613,-0.025448,-0.020896,4
5561638,0.115358,-1.928526,0.314478,-0.014613,-0.025448,-0.020896,4
5561639,-0.130428,-1.877064,0.311491,-0.014613,-0.025448,-0.020896,4
5561640,-0.023700,-1.652310,0.274533,-0.014613,-0.025448,-0.020896,4


# Save the Dataset


In [13]:
scaled_df.to_csv(f'/content/drive/MyDrive/Smart Dog Collar/Data/03-31-2021_snapshot/cleaned_data.csv',
                 index=False)