# Training of ML Model for the PDIOT CW3

## Import Dependencies

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
from datetime import datetime

In [2]:
UUNs = ['s2106809', 's2100273', 's2104454']
cols = ['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z', 'activity']
data_file_name = f"all_data{datetime.now()}.csv"
if len(UUNs) < 3:
    name_prefix = '_'.join(UUNs)
    data_file_name = f"{name_prefix}_data{datetime.now()}.csv)"

all_data = os.path.join(os.getcwd(), data_file_name)
pd.DataFrame(columns=cols).to_csv(all_data, mode='w', header=True, index=False)

In [3]:
def is_req_file(filename):
    return ("Respeck" in filename and "clean" in filename and
            filename.split("_")[1] in UUNs)

data_dir = os.path.join(os.getcwd(), 'Respeck')
all_files = []
for subdir in os.listdir(data_dir):
    if os.path.isdir(os.path.join(data_dir, subdir)):
        for file in os.listdir(os.path.join(data_dir, subdir)):
            file = os.path.join(data_dir, subdir, file)
            all_files.append(file)

In [4]:
for file in all_files:
    df = pd.read_csv(os.path.join(data_dir, file))
    act, subact = file.split('_')[2:4]
    df['activity'] = pd.Series(f"{act} {subact}", index=df.index)
    df = df[cols]
    df.to_csv(all_data, mode='a', header=False, index=False)

In [5]:
random_seed = 42
n_time_steps = 50
n_features = 6
step = 10
n_epochs = 50
batch_size = 64
learning_rate = 0.0015
l2_loss = 0.0015

segments = []
labels = []

In [6]:
all_data = os.path.join(os.getcwd(), data_file_name)
df = pd.read_csv(all_data)

for i in range(0, len(df) - n_time_steps, step):
    window_data = df[i: i + n_time_steps]
    segments.append(window_data.drop(['activity'], axis=1).values)
    labels.append(window_data['activity'].values[0])

In [7]:
segments = np.asarray(segments, dtype=np.float32).reshape(-1, n_time_steps, n_features)
labels = np.asarray(labels)

In [8]:
le = LabelEncoder()
labels = le.fit_transform(labels)

In [9]:
segments.shape, labels.shape

((234780, 50, 6), (234780,))

In [10]:
x_train, x_test, y_train, y_test = train_test_split(segments, labels, test_size=0.2, random_state=random_seed)

In [11]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((187824, 50, 6), (187824,), (46956, 50, 6), (46956,))

In [12]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D
from keras.optimizers.legacy import Adam

In [13]:
conv_x_train = x_train.reshape(-1, n_time_steps, n_features, 1)
conv_x_test = x_test.reshape(-1, n_time_steps, n_features, 1)

conv_x_train.shape, conv_x_test.shape

((187824, 50, 6, 1), (46956, 50, 6, 1))

In [14]:
model = Sequential()
model.add(Conv2D(16, (2, 2), activation = 'relu', input_shape = conv_x_train[0].shape))
model.add(Dropout(0.1))

model.add(Conv2D(32, (2, 2), activation='relu'))
model.add(Dropout(0.2))

model.add(Flatten())

model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.5))

model.add(Dense(44, activation='softmax'))

In [16]:
model.compile(optimizer=Adam(learning_rate = 0.001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
history = model.fit(conv_x_train, y_train, epochs =10, validation_data= (conv_x_test, y_test), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
loss, accuracy = model.evaluate(conv_x_test, y_test)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')

Test loss: 0.36413830518722534, Test accuracy: 0.846963107585907
