# Importing dependecies

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import csv
import array
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from numpy import savetxt
from sklearn.preprocessing import StandardScaler
from collections import Counter
from sklearn.model_selection import train_test_split
import tensorflow as tf
import scipy.stats as stats
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
from math import sqrt
import math

In [2]:
from scipy.fftpack import fft
from scipy.stats import spearmanr
from scipy.stats import pearsonr

# Import dataset

In [3]:
"""Import Datasets"""
dataset_user04 = pd.read_csv("final_field_user04_dataset.csv")
dataset_user07 = pd.read_csv("final_field_user07_dataset.csv")
dataset_user08 = pd.read_csv("final_field_user08_dataset.csv")
dataset_user18 = pd.read_csv("final_field_user18_dataset.csv")
dataset_user38 = pd.read_csv("final_field_user38_dataset.csv")
dataset_user51 = pd.read_csv("final_field_user51_dataset.csv")
dataset_lab = pd.read_csv("lab data localized.csv")

dataset = pd.concat([dataset_lab, dataset_user04, dataset_user07, dataset_user08, dataset_user18, dataset_user38, dataset_user51], ignore_index = True)


# Data processing

In [4]:
""" Data processing"""
X = dataset.iloc[:, [0, 1, 2]].values
Y = dataset.iloc[:, 3].values
Y = Y - 1

## Data Augmentation

we just use scaling for data augmentation. The minority classes are scaled and added to the original data to make the original data balance. Our assumption is that, if we scale a signal it still represent the same activity

In [6]:
# Data Augmentation
scalling_factor = [0.3,0.5,0.7,0.8,1.2,1.5,2]

for scal in scalling_factor:
    X_1_scal = X[np.where(Y==1)]*scal
    label_1 = Y[np.where(Y==1)]
    X_2_scal = X[np.where(Y==2)]*scal
    label_2 = Y[np.where(Y==2)]
    X_5_scal = X[np.where(Y==5)]*scal
    label_5 = Y[np.where(Y==5)]
    X_7_scal = X[np.where(Y==7)]*scal
    label_7 = Y[np.where(Y==7)]
    X_8_scal = X[np.where(Y==8)]*scal
    label_8 = Y[np.where(Y==8)]
    X_9_scal = X[np.where(Y==9)]*scal
    label_9 = Y[np.where(Y==9)]
    X_10_scal = X[np.where(Y==10)]*scal
    label_10 = Y[np.where(Y==10)]
    X = np.concatenate((X, X_1_scal,X_2_scal,X_5_scal,X_7_scal,X_8_scal,X_9_scal,X_10_scal))
    Y = np.concatenate((Y, label_1,label_2, label_5, label_7, label_8, label_9, label_10))

scalling_factor = [0.5,2]

for scal in scalling_factor:
    X_3_scal = X[np.where(Y==3)]*scal
    label_3 = Y[np.where(Y==3)]
    X_11_scal = X[np.where(Y==11)]*scal
    label_11 = Y[np.where(Y==11)]
    X = np.concatenate((X, X_3_scal, X_11_scal))
    Y = np.concatenate((Y, label_3, label_11))

scalling_factor = [0.3,0.5,1.5,2]

for scal in scalling_factor:
    X_0_scal = X[np.where(Y==0)]*scal
    label_0 = Y[np.where(Y==0)]
    X_6_scal = X[np.where(Y==6)]*scal
    label_6 = Y[np.where(Y==6)]
    X = np.concatenate((X, X_0_scal, X_6_scal))
    Y = np.concatenate((Y, label_0, label_6))

In [7]:
del X_1_scal

In [8]:
X.shape,Y.shape

((26992376, 3), (26992376,))

## low pass filter

In [9]:
low_pass_filter = np.zeros((X.shape[0], X.shape[1]))
for i in range(len(X)):
    for j in range(X.shape[1]):
        if i == 0:
            low_pass_filter[i][j] = X[i][j]
        else:
            low_pass_filter[i][j] = 0.8*X[i-1][j] + (1-0.8)*X[i][j]

## Frame preparation

In [10]:
# frame preparation
Fs = 4
frame_size = Fs*20 # 80 # 20 seconds of data
hop_size =  Fs*10

def get_frames(df, frame_size, hop_size,label_data):

    N_FEATURES = 3

    frames = []
    labels = []
    for i in range(0, len(df) - frame_size, hop_size):
        value = df[i: i + frame_size, :]
        #y = df[i: i + frame_size, 1]
        #z = df[i: i + frame_size, 2]

        # Retrieve the most often used label in this segment
        label = stats.mode(label_data[i: i + frame_size])[0][0]
        frames.append([value])
        labels.append(label)

    # Bring the segments into a better shape
    frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
    labels = np.asarray(labels)

    return frames, labels

row_frame, row_label = get_frames(X, frame_size, hop_size, Y)
low_pass_frame, low_pass_label = get_frames(low_pass_filter, frame_size, hop_size, Y)

In [11]:
row_frame.shape, low_pass_frame.shape

((674808, 80, 3), (674808, 80, 3))

## Feature calculation

In [12]:
#feature calculation
mean = np.mean(low_pass_frame, axis = 1)  # mean along (x,y,z) axis
std = np.std(low_pass_frame, axis = 1)  # standard deviation along (x,y,z) axis

rms_x = []  # root mean square along x axis
value = 0
for i in range(low_pass_frame.shape[0]):
    for j in range(low_pass_frame.shape[1]):
        value = value + (low_pass_frame[i][j][0] **2)
        value = value / (low_pass_frame.shape[1])
    value = sqrt(value)
    rms_x.append(value)
    value = 0

rms_y = []  # root mean square along y axis
for i in range(low_pass_frame.shape[0]):
    for j in range(low_pass_frame.shape[1]):
        value = value + (low_pass_frame[i][j][1] **2)
        value = value / (low_pass_frame.shape[1])
    value = sqrt(value)
    rms_y.append(value)
    value = 0

rms_z = []  # root mean square z axis
for i in range(low_pass_frame.shape[0]):
    for j in range(low_pass_frame.shape[1]):
        value = value + (low_pass_frame[i][j][2] **2)
        value = value / (low_pass_frame.shape[1])
    value = sqrt(value)
    rms_z.append(value)
    value = 0

In [13]:
lengths = []  # length of the accelerometer vector
length = 0
nx, ny, nz = row_frame.shape[0], row_frame.shape[1], row_frame.shape[2]
acc_vector = np.reshape(row_frame,(nx, ny*nz))

for i in range(acc_vector.shape[0]):
    count = 0
    for j in range(acc_vector.shape[1]):
        length += acc_vector[i][j]**2
        count += 1

        if count == 3:
            length = sqrt(length)
            lengths.append(length)
            length = 0
            count = 0

avc = []  # avc feature

polling_rate = 4 #Hz

for i in range(0, len(lengths), frame_size):
    value = 0
    iterable = lengths[i:i+frame_size]
    for j in range(len(iterable) - 1, 1, -1):
        value += abs(iterable[j] - iterable[j - 1])

    value = value / (frame_size / polling_rate)
    avc.append(value)


In [14]:
max_min = []

for i in range(0, len(lengths), frame_size):
    iterable = lengths[i:i+frame_size]

    max_min.append(max(iterable) - min(iterable))


In [15]:
angle = np.zeros((low_pass_frame.shape[0],low_pass_frame.shape[1],low_pass_frame.shape[2]))  # inclenation angle
angles = []
length = 0
for i in range(angle.shape[0]):
    for j in range(angle.shape[1]):
        length = low_pass_frame[i,j,0]**2 + low_pass_frame[i,j,1]**2 + low_pass_frame[i,j,2]**2
        length = sqrt(length)
        if length == 0:
            print("length is zero in index row= ",i,"column= ",j)
        else:
            angle_x = math.acos(low_pass_frame[i,j,0]/length)
            angle_y = math.acos(low_pass_frame[i,j,1]/length)
            angle_z = math.acos(low_pass_frame[i,j,2]/length)
            angle[i][j][0] = angle_x
            angle[i][j][1] = angle_y
            angle[i][j][2] = angle_z
nx, ny, nz = angle.shape
angle = np.reshape(angle,(nx,ny*nz))


In [16]:
fft_x = []  # fast fourier transform cofficient along x axis
for i in range(row_frame.shape[0]):
  fft_x.append(fft(row_frame[i,:,0]))

fft_y = []  # fast fourier transform cofficient along x axis
for i in range(row_frame.shape[0]):
  fft_y.append(fft(row_frame[i,:,1]))

fft_z = []  # fast fourier transform cofficient along x axis
for i in range(row_frame.shape[0]):
  fft_z.append(fft(row_frame[i,:,2]))

energy_x = []
modulus_square = 0
for i in fft_x:
  for j in range(len(fft_x[0])):
    modulus_square += i[j].real**2 + i[j].imag**2
  energy_x.append(modulus_square/len(fft_x[0]))
  modulus_square = 0

energy_y = []
for i in fft_y:
  for j in range(len(fft_y[0])):
    modulus_square += i[j].real**2 + i[j].imag**2
  energy_y.append(modulus_square/len(fft_x[0]))
  modulus_square = 0

energy_z = []
for i in fft_z:
  for j in range(len(fft_z[0])):
    modulus_square += i[j].real**2 + i[j].imag**2
  energy_z.append(modulus_square/len(fft_x[0]))
  modulus_square = 0
'''
correlation = np.zeros((row_frame.shape[0],3))
for i in range(row_frame.shape[0]):
  corr_xy, _ =  pearsonr(row_frame[i,:,0], row_frame[i,:,1])
  corr_yz, _ =  pearsonr(row_frame[i,:,1], row_frame[i,:,2])
  corr_xz, _ =  pearsonr(row_frame[i,:,0], row_frame[i,:,2])
  correlation[i][0] = corr_xy
  correlation[i][1] = corr_yz
  correlation[i][2] = corr_xz
'''

'\ncorrelation = np.zeros((row_frame.shape[0],3))\nfor i in range(row_frame.shape[0]):\n  corr_xy, _ =  pearsonr(row_frame[i,:,0], row_frame[i,:,1])\n  corr_yz, _ =  pearsonr(row_frame[i,:,1], row_frame[i,:,2])\n  corr_xz, _ =  pearsonr(row_frame[i,:,0], row_frame[i,:,2])\n  correlation[i][0] = corr_xy\n  correlation[i][1] = corr_yz\n  correlation[i][2] = corr_xz\n'

In [17]:
%whos

Variable             Type                          Data/Info
------------------------------------------------------------
Adam                 ABCMeta                       <class 'tensorflow.python<...>.optimizer_v2.adam.Adam'>
BatchNormalization   type                          <class 'tensorflow.python<...>n_v2.BatchNormalization'>
Conv2D               type                          <class 'tensorflow.python<...>rs.convolutional.Conv2D'>
Counter              type                          <class 'collections.Counter'>
Dense                type                          <class 'tensorflow.python<...>keras.layers.core.Dense'>
Dropout              type                          <class 'tensorflow.python<...>ras.layers.core.Dropout'>
Flatten              type                          <class 'tensorflow.python<...>ras.layers.core.Flatten'>
Fs                   int                           4
MaxPool2D            type                          <class 'tensorflow.python<...>rs.pooling.MaxPooling

In [18]:
del X_2_scal
del X_3_scal
del X_5_scal
del X_6_scal
del X_7_scal
del X_8_scal
del X_9_scal
del X_10_scal
del X_11_scal

In [20]:
"""Import Datasets"""
del dataset_user04 
del dataset_user07 
del dataset_user08 
del dataset_user18 
del dataset_user38 
del dataset_user51 
del dataset_lab 

del dataset 

NameError: ignored

In [21]:
input_vector = np.zeros((row_frame.shape[0], angle.shape[1] + 14))  # input vector for the algorithm
for i in range(input_vector.shape[0]):
    input_vector[i][0] = mean[i][0]
    input_vector[i][1] = mean[i][1]
    input_vector[i][2] = mean[i][2]
    input_vector[i][3] = std[i][0]
    input_vector[i][4] = std[i][1]
    input_vector[i][5] = std[i][2]
    input_vector[i][6] = rms_x[i]
    input_vector[i][7] = rms_y[i]
    input_vector[i][8] = rms_z[i]
    input_vector[i][9] = max_min[i]
    input_vector[i][10] = avc[i]
    input_vector[i,11] = energy_x[i]
    input_vector[i,12] = energy_y[i]
    input_vector[i,13] = energy_z[i]
    #input_vector[i,9] = correlation[i,0]
    #input_vector[i,10] = correlation[i,1]
    #input_vector[i,11] = correlation[i,2]


for i in range(angle.shape[0]):
    for j in range(angle.shape[1]):
        input_vector[i][j+14] = angle[i][j]

In [24]:
del X
del X_0_scal
del Y 
del acc_vector
del angle
del energy_x
del energy_y
del energy_z
del avc
del fft_x
del fft_y
del fft_z
del label_0
del label_1
del label_2
del label_3
del label_5
del label_6
del label_7
del label_8
del label_9
del label_10
del label_11
del low_pass_filter
del low_pass_frame
del low_pass_label
del rms_x
del rms_y
del rms_z
del std

In [25]:
input_vector.shape, row_label.shape

((674808, 254), (674808,))

# Standarization

In [26]:
sc = StandardScaler()
input_vector = sc.fit_transform(input_vector)

# Trian Test split

In [27]:
X_train, X_val, y_train, y_val = train_test_split(input_vector, row_label, test_size = 0.2, random_state = 0, stratify = row_label)

In [28]:
X_train.shape, y_train.shape

((539846, 254), (539846,))

# Algorithm

## Neural Network

In [29]:
model = Sequential()
model.add(Dense(units=32, activation='relu', input_shape=(X_train.shape[1], )))
#model.add(Dropout(0.2))
model.add(Dense(units=64, activation='relu'))
#model.add(Dropout(0.2))
model.add(Dense(units=128, activation='relu'))
#model.add(Dropout(0.2))
#model.add(Dense(units=324, activation='relu'))
#model.add(Dropout(0.2))
model.add(Dense(units=12, activation='softmax'))

In [30]:
model.compile(optimizer=Adam(learning_rate = 0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs = 50, batch_size =64, validation_data= (X_val, y_val), verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
 177/8436 [..............................] - ETA: 19s - loss: 0.5571 - accuracy: 0.8189

In [None]:
y_pred = model.predict(X_val)

In [None]:
y_pred_ = np.zeros((y_pred.shape[0]))
for i in range(y_pred.shape[0]):
    y_pred_[i] = np.argmax(y_pred[i,:])

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_pred_, y_val)

In [None]:
pd.DataFrame(cm)

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_val, y_pred_)

In [None]:
per_class_accuracy = {}
for i in range(cm.shape[0]):
    accuracy = cm[i,i]/sum(cm[i,:])
    per_class_accuracy[i] = accuracy

In [None]:
per_class_accuracy