# CNN Implementation #

In [1]:
# pip install tensorflow-addons

In [9]:
import os

import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
# import tensorflow as tf
# import tensorflow_addons as tfa
# from tensorflow.keras import layers
from tqdm.notebook import tqdm
# from sklearn.svm import SVC
# from sklearn.svm import LinearSVC
# from sklearn.model_selection import GridSearchCV
# from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
# from sklearn.model_selection import KFold
# from sklearn.metrics import f1_score
# from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD

# Load Dateset #

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
#data_path = '/content/drive/MyDrive/motionsense_dataset'
data_path = os.path.join(os.getcwd(), 'data')
print(os.listdir(data_path))

['dws_11', 'ups_12', '.DS_Store', 'wlk_7', 'std_14', 'wlk_15', 'wlk_8', 'dws_2', 'sit_13', 'jog_9', 'ups_3', 'ups_4', 'jog_16', 'dws_1', 'sit_5', 'std_6']


In [4]:
folders = glob(os.path.join(data_path,'*_*'))
folders = [s for s in folders if ("csv" not in s) or (".DS_Store" not in s) or (".ipynb" not in s)]
df_all_list = []
activity_codes = {'dws':0,'jog':1,'sit':2,'std':3,'ups':4,'wlk':5}
activity_decodes = {0:'dws',1:'jog',2:'sit',3:'std',4:'ups',5:'wlk'}
activity_types = list(activity_codes.keys())
print(folders)

['/Users/apple/Desktop/CG4002_Capstone/ML/data/dws_11', '/Users/apple/Desktop/CG4002_Capstone/ML/data/ups_12', '/Users/apple/Desktop/CG4002_Capstone/ML/data/wlk_7', '/Users/apple/Desktop/CG4002_Capstone/ML/data/std_14', '/Users/apple/Desktop/CG4002_Capstone/ML/data/wlk_15', '/Users/apple/Desktop/CG4002_Capstone/ML/data/wlk_8', '/Users/apple/Desktop/CG4002_Capstone/ML/data/dws_2', '/Users/apple/Desktop/CG4002_Capstone/ML/data/sit_13', '/Users/apple/Desktop/CG4002_Capstone/ML/data/jog_9', '/Users/apple/Desktop/CG4002_Capstone/ML/data/ups_3', '/Users/apple/Desktop/CG4002_Capstone/ML/data/ups_4', '/Users/apple/Desktop/CG4002_Capstone/ML/data/jog_16', '/Users/apple/Desktop/CG4002_Capstone/ML/data/dws_1', '/Users/apple/Desktop/CG4002_Capstone/ML/data/sit_5', '/Users/apple/Desktop/CG4002_Capstone/ML/data/std_6']


# CNN/MLP: Feature extraction, Train and Test ##

There are 24 data objects (people) performing motions. 6 different labels, each has different trials depending on time interval. 
We should get 24 * 15 = 360 such samples.


Long trials: those with number 1 to 9 with around 2 to 3 minutes duration.
Short trials: those with number 11 to 16 that are around 30 seconds to 1 minutes duration.

Sample rate is 50 Hz, this means a 2s window should have 100 samples?

In [5]:
# window size is 10 s, sample rate 50 Hz
# Sliding window, without Feature extraction nor low pass filter, for CNN only (dirrectly fits data in CNN)
window = 500
data_all_x_list = []
data_all_y_list = []
for j in folders:
    csv_list = glob(j + '/*')
    label = j.split('/')[-1].split('_')[0]
    # trial_number = int(j.split('/')[-1].split('_')[1])
    # print(label, trial_number)
    for i in csv_list:
        df = pd.read_csv(i, usecols = ['attitude.roll', 'attitude.pitch', 'attitude.yaw','userAcceleration.x','userAcceleration.y','userAcceleration.z'])
        win_count = int(df.shape[0] / (window / 2)) - 1
        data_x = np.zeros((win_count, window, df.shape[1]))
        data_y = np.zeros(win_count)

        length = len(df)
        for c in range(win_count):
            start_idx = int(c * (window / 2))
            end_idx = start_idx + window
            data_x[c,:,:] = df[start_idx:end_idx].values
            data_y[:] = activity_codes[label]
#         print(data_x.shape)
#         sys.exit(0)
        data_all_x_list.append(data_x)
        data_all_y_list.append(data_y)
data_all_x = np.concatenate(data_all_x_list,axis=0)
data_all_y = np.concatenate(data_all_y_list,axis=0)
data_all_y = data_all_y.astype(int)
print(data_all_x.shape)
print(data_all_y.shape)

(5102, 500, 6)
(5102,)


In [6]:
#Feature extraction for NN model e.x. MLP
window = 500
feature_n = 6
attributes = ['attitude.roll', 'attitude.pitch', 'attitude.yaw','userAcceleration.x','userAcceleration.y','userAcceleration.z']
data_all_x_list = []
data_all_y_list = []
for j in folders:
    csv_list = glob(j + '/*')
    label = j.split('/')[-1].split('_')[0]
    for i in csv_list:
        df = pd.read_csv(
            i,
            usecols = ['attitude.roll','attitude.pitch',
                       'attitude.yaw','userAcceleration.x',
                       'userAcceleration.y','userAcceleration.z']
         )
        win_count = int(df.shape[0] / (window / 2)) - 1
        data_x = np.zeros((win_count, df.shape[1] * 5))
        data_y = np.zeros(win_count)

        length = len(df)
        for c in range(win_count):
            start_idx = int(c * (window / 2))
            end_idx = start_idx + window
            temp_list = []
            for k in range(feature_n):
                means = df[start_idx:end_idx][attributes[k]].mean()
                stds = df[start_idx:end_idx][attributes[k]].mean()
                mins = df[start_idx:end_idx][attributes[k]].min()
                maxs = df[start_idx:end_idx][attributes[k]].max()
                mads = df[start_idx:end_idx][attributes[k]].mad()
                temp_list.append(means)
                temp_list.append(stds)
                temp_list.append(mins)
                temp_list.append(maxs)
                temp_list.append(mads)
            data_x[c] = temp_list
            data_y[c] = activity_codes[label]

        data_all_x_list.append(data_x)
        data_all_y_list.append(data_y)
data_all_x = np.concatenate(data_all_x_list,axis=0)
data_all_y = np.concatenate(data_all_y_list,axis=0)
data_all_y = data_all_y.astype(int)
print(data_all_x.shape)
print(data_all_y.shape)

(5102, 30)
(5102,)


## CNN Train & Test ##

In [None]:
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [8]:
train_x_temp,test_x,train_y_temp,test_y = train_test_split(data_all_x,data_all_y,test_size=0.2)
train_x,val_x,train_y,val_y = train_test_split(train_x_temp,train_y_temp,test_size=0.1)
assert(len(train_x) == len(train_y))
print(len(train_x), len(test_x),len(val_x))

3672 1021 409


In [None]:
#custom dataset
class _Dataset(Dataset):
    def __init__(self, dat):
        self.dat = dat
        
    def __len__(self):
        return len(self.dat)
    
    def __getitem__(self,idx):
        return self.dat[idx]

class cnn(Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = Sequential(
            # Defining a 2D convolution layer
            Conv2d(1, 4, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(4),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            # Defining another 2D convolution layer
            Conv2d(4, 4, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(4),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
        )

        self.linear_layers = Sequential(
            Linear(4 * 7 * 7, 10)
        )

    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x