# Activity classification problem using the UCI HAR dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
from tensorflow.math import confusion_matrix
import seaborn as sn

## Open raw acceleration and gyroscope data

Does not need to be run

In [None]:
# Training data
# raw body acceleration
raw_body_acc_x_train = pd.read_csv('data/UCIHARDataset/train/Inertial Signals/body_acc_x_train.txt', 
                                   delim_whitespace=True,
                                   header=None)
raw_body_acc_y_train = pd.read_csv('data/UCIHARDataset/train/Inertial Signals/body_acc_y_train.txt',
                                  delim_whitespace=True,
                                  header=None)
raw_body_acc_z_train = pd.read_csv('data/UCIHARDataset/train/Inertial Signals/body_acc_z_train.txt',
                                  delim_whitespace=True,
                                  header=None)
# raw body gyroscope
raw_body_gyro_x_train = pd.read_csv('data/UCIHARDataset/train/Inertial Signals/body_gyro_x_train.txt',
                                   delim_whitespace=True,
                                   header=None)
raw_body_gyro_y_train = pd.read_csv('data/UCIHARDataset/train/Inertial Signals/body_gyro_y_train.txt',
                                   delim_whitespace=True,
                                   header=None)
raw_body_gyro_z_train = pd.read_csv('data/UCIHARDataset/train/Inertial Signals/body_gyro_z_train.txt',
                                   delim_whitespace=True,
                                   header=None)
# raw total acceleration
raw_total_acc_x_train = pd.read_csv('data/UCIHARDataset/train/Inertial Signals/total_acc_x_train.txt',
                                   delim_whitespace=True,
                                   header=None)
raw_total_acc_y_train = pd.read_csv('data/UCIHARDataset/train/Inertial Signals/total_acc_y_train.txt',
                                   delim_whitespace=True,
                                   header=None)
raw_total_acc_z_train = pd.read_csv('data/UCIHARDataset/train/Inertial Signals/total_acc_z_train.txt',
                                   delim_whitespace=True,
                                   header=None)
# Put the x_train references in an array
X_train = [raw_body_acc_x_train, raw_body_acc_y_train, raw_body_acc_z_train, 
           raw_body_gyro_x_train, raw_body_gyro_y_train, raw_body_gyro_z_train,
           raw_total_acc_x_train, raw_total_acc_y_train, raw_total_acc_z_train]

In [None]:
# Testing data
# raw body acceleration
raw_body_acc_x_test = pd.read_csv('data/UCIHARDataset/test/Inertial Signals/body_acc_x_test.txt',
                                 delim_whitespace=True,
                                 header=None)
raw_body_acc_y_test = pd.read_csv('data/UCIHARDataset/test/Inertial Signals/body_acc_y_test.txt',
                                 delim_whitespace=True,
                                 header=None)

raw_body_acc_z_test = pd.read_csv('data/UCIHARDataset/test/Inertial Signals/body_acc_z_test.txt',
                                 delim_whitespace=True,
                                 header=None)
# raw body gyroscope
raw_body_gyro_x_test = pd.read_csv('data/UCIHARDataset/test/Inertial Signals/body_gyro_x_test.txt',
                                   delim_whitespace=True,
                                   header=None)
raw_body_gyro_y_test = pd.read_csv('data/UCIHARDataset/test/Inertial Signals/body_gyro_y_test.txt',
                                   delim_whitespace=True,
                                   header=None)
raw_body_gyro_z_test = pd.read_csv('data/UCIHARDataset/test/Inertial Signals/body_gyro_z_test.txt',
                                   delim_whitespace=True,
                                   header=None)
# raw total acceleration
raw_total_acc_x_test = pd.read_csv('data/UCIHARDataset/test/Inertial Signals/total_acc_x_test.txt',
                                   delim_whitespace=True,
                                   header=None)
raw_total_acc_y_test = pd.read_csv('data/UCIHARDataset/test/Inertial Signals/total_acc_y_test.txt',
                                   delim_whitespace=True,
                                   header=None)
raw_total_acc_z_test = pd.read_csv('data/UCIHARDataset/test/Inertial Signals/total_acc_z_test.txt',
                                   delim_whitespace=True,
                                   header=None)
# Put the x_test references in an array
X_test = [raw_body_acc_x_test, raw_body_acc_y_test, raw_body_acc_z_test, 
           raw_body_gyro_x_test, raw_body_gyro_y_test, raw_body_gyro_z_test,
           raw_total_acc_x_test, raw_total_acc_y_test, raw_total_acc_z_test]

In [None]:
print(X_train[0].shape)
print(X_test[0].shape)

In [None]:
# transpose to get 128 columns
for i in range(len(X_train)):
    X_train[i] = X_train[i].transpose()
for i in range(len(X_test)):
    X_test[i] = X_test[i].transpose()
print(X_train[0].shape)
print(X_test[0].shape)

In [None]:
X_train[0]

In [None]:
for i in range(len(X_train)):
    X_train[i][0].plot()
plt.title('An instance of training data')
plt.legend(['body_acc_x', 'body_acc_y', 'body_acc_z', 
            'body_gyro_x', 'body_gyro_y', 'body_gyro_z', 
            'total_acc_x', 'total_acc_y', 'total_acc_z'])
plt.show()

In [None]:
for i in range(len(X_test)):
    X_test[i][0].plot()  
plt.title('An instance of testing data')
plt.legend(['body_acc_x', 'body_acc_y', 'body_acc_z', 
            'body_gyro_x', 'body_gyro_y', 'body_gyro_z', 
            'total_acc_x', 'total_acc_y', 'total_acc_z'])
plt.show()

In [None]:
raw_X_train = X_train
raw_X_test = X_test

## Classes

1. Walking
2. Walking upstairs
3. Walking downstairs
4. Sitting
5. Standing
6. Laying

In [None]:
classes = ['Walking', 'Walking U-S', 'Walking D-S', 'Sitting', 'Standing', 'Laying']

In [None]:
# Results for training data
y_train = pd.read_csv('y_train.txt',
                     delim_whitespace=True,
                     header=None)
# Results for testing data
y_test = pd.read_csv('y_test.txt',
                     delim_whitespace=True,
                     header=None)

In [None]:
# Move all value to range[0, 5]
y_train = y_train - 1
y_test = y_test - 1

In [None]:
# See distribution of classes in training and testing
train_classes = [0, 0, 0, 0, 0, 0]
test_classes = [0, 0, 0, 0, 0, 0]

for index, row in y_train.iterrows():
    train_classes[row[0]] += 1
for index, row in y_test.iterrows():
    test_classes[row[0]] += 1
    
print('Class distribution in training:\t', train_classes)
print('Class distribution in testing:\t', test_classes)

In [None]:
# percentages
for i in range(6):
    print(classes[i])
    print('\tPercentage train:\t', format(100 * train_classes[i] / (train_classes[i] + test_classes[i]), '.2f'), '%')
    print('\tPercentage test:\t', format(100 * test_classes[i] / (train_classes[i] + test_classes[i]), '.2f'), '%')

## Beginning preprocessing

Plain dataset has absolutely no changes.
Transformation 1 dataset has been changed using SigNorm.

Features (66 total) to have for each 128-length measurement:
- mean (9)
- median (9)
- variance (9)
- median absolute deviation (9)
- rms (9)
- difference of min and max (9)
- xy magnitude (using dif of min and max x, and dif of min and max y) (3)
- yz magnitude (3)
- xz magnitude (3)
- xyz magnitude (3)

In [None]:
# function to compute features
def getFeatures(train, test):
    """
        Takes in the arrays of training data and testing data 
        and returns the completed dataset with features.
    """
    
    # calculate means
    means_train = pd.DataFrame()
    means_test = pd.DataFrame()
    mean_labels = ['body_acc_x_mean', 'body_acc_y_mean', 'body_acc_z_mean', 
                   'body_gyro_x_mean', 'body_gyro_y_mean', 'body_gyro_z_mean', 
                   'total_acc_x_mean', 'total_acc_y_mean', 'total_acc_z_mean']
    for i in range(9):
        means_train[mean_labels[i]] = train[i].mean()
        means_test[mean_labels[i]] = test[i].mean()
    
    # calculate medians
    med_train = pd.DataFrame()
    med_test = pd.DataFrame()
    med_labels = ['body_acc_x_med', 'body_acc_y_med', 'body_acc_z_med', 
                   'body_gyro_x_med', 'body_gyro_y_med', 'body_gyro_z_med', 
                   'total_acc_x_med', 'total_acc_y_med', 'total_acc_z_med']
    for i in range(9):
        med_train[med_labels[i]] = train[i].median()
        med_test[med_labels[i]] = test[i].median()
    
    # calculate variance
    sd_train = pd.DataFrame()
    sd_test = pd.DataFrame()
    sd_labels = ['body_acc_x_var', 'body_acc_y_var', 'body_acc_z_var', 
                 'body_gyro_x_var', 'body_gyro_y_var', 'body_gyro_z_var', 
                 'total_acc_x_var', 'total_acc_y_var', 'total_acc_z_var']
    for i in range(9):
        sd_train[sd_labels[i]] = train[i].var()
        sd_test[sd_labels[i]] = test[i].var()
        
    # calculate median absolute deviation
    mad_train = pd.DataFrame()
    mad_test = pd.DataFrame()
    mad_labels = ['body_acc_x_mad', 'body_acc_y_mad', 'body_acc_z_mad', 
                  'body_gyro_x_mad', 'body_gyro_y_mad', 'body_gyro_z_mad', 
                  'total_acc_x_mad', 'total_acc_y_mad', 'total_acc_z_mad']
    for i in range(9):
        mad_train[mad_labels[i]] = train[i].mad()
        mad_test[mad_labels[i]] = test[i].mad()
        
    # calculate root-mean-square
    rms_train = pd.DataFrame()
    rms_test = pd.DataFrame()
    rms_labels = ['body_acc_x_rms', 'body_acc_y_rms', 'body_acc_z_rms', 
                  'body_gyro_x_rms', 'body_gyro_y_rms', 'body_gyro_z_rms', 
                  'total_acc_x_rms', 'total_acc_y_rms', 'total_acc_z_rms']
    for i in range(9):
        rms_train[rms_labels[i]] = np.sqrt(np.power(train[i], 2).sum() 
                                           / len(train[i].index))
        rms_test[rms_labels[i]] = np.sqrt(np.power(test[i], 2).sum() 
                                          / len(test[i].index))
        
    # calculate max - min (dx, dy, dz)
    dif_train = pd.DataFrame()
    dif_test = pd.DataFrame()
    dif_labels = ['body_acc_x_dif', 'body_acc_y_dif', 'body_acc_z_dif', 
                  'body_gyro_x_dif', 'body_gyro_y_dif', 'body_gyro_z_dif', 
                  'total_acc_x_dif', 'total_acc_y_dif', 'total_acc_z_dif']
    for i in range(9):
        dif_train[dif_labels[i]] = train[i].max() - train[i].min()
        dif_test[dif_labels[i]] = test[i].max() - test[i].min()
        
    # calculate xy magnitude
    xy_mag_train = pd.DataFrame()
    xy_mag_test = pd.DataFrame()
    mag_xy_labels = ['body_acc_xy_mag', 'body_gyro_xy_mag', 'total_acc_xy_mag']
    # body_acc_xy_mag
    # body_gyro_xy_mag
    # total_acc_xy_mag
    for i in range(3):
        xy_mag_train[mag_xy_labels[i]] = np.sqrt(np.power(dif_train[dif_labels[(i * 3)]], 2) 
                                                 + np.power(dif_train[dif_labels[(i * 3) + 1]], 2))
        xy_mag_test[mag_xy_labels[i]] = np.sqrt(np.power(dif_test[dif_labels[(i * 3)]], 2) 
                                                + np.power(dif_test[dif_labels[(i * 3) + 1]], 2))
        
    # calculate yz magnitude
    yz_mag_train = pd.DataFrame()
    yz_mag_test = pd.DataFrame()
    mag_yz_labels = ['body_acc_yz_mag', 'body_gyro_yz_mag', 'total_acc_yz_mag']
    # body_acc_yz_mag
    # body_gyro_yz_mag
    # total_acc_yz_mag
    for i in range(3):
        yz_mag_train[mag_yz_labels[i]] = np.sqrt(np.power(dif_train[dif_labels[(i * 3 + 1)]], 2) 
                                                 + np.power(dif_train[dif_labels[(i * 3 + 2)]], 2))
        yz_mag_test[mag_yz_labels[i]] = np.sqrt(np.power(dif_test[dif_labels[(i * 3 + 1)]], 2) 
                                                + np.power(dif_test[dif_labels[(i * 3 + 2)]], 2))
        
    # calculate xz magnitude
    xz_mag_train = pd.DataFrame()
    xz_mag_test = pd.DataFrame()
    mag_xz_labels = ['body_acc_xz_mag', 'body_gyro_xz_mag', 'total_acc_xz_mag']
    # body_acc_xz_mag
    # body_gyro_xz_mag
    # total_acc_xz_mag
    for i in range(3):
        xz_mag_train[mag_xz_labels[i]] = np.sqrt(np.power(dif_train[dif_labels[(i * 3)]], 2) 
                                                 + np.power(dif_train[dif_labels[(i * 3 + 2)]], 2))
        xz_mag_test[mag_xz_labels[i]] = np.sqrt(np.power(dif_test[dif_labels[(i * 3)]], 2) 
                                                 + np.power(dif_test[dif_labels[(i * 3 + 2)]], 2))
        
    # calculate xyz magnitude
    xyz_mag_train = pd.DataFrame()
    xyz_mag_test = pd.DataFrame()
    mag_xyz_labels = ['body_acc_xyz_mag', 'body_gyro_xyz_mag', 'total_acc_xyz_mag']
    # body_acc_xyz_mag
    # body_gyro_xyz_mag
    # total_acc_xyz_mag
    for i in range(3):
        xyz_mag_train[mag_xyz_labels[i]] = np.sqrt(np.power(dif_train[dif_labels[(i * 3)]], 2)
                                                  + np.power(dif_train[dif_labels[(i * 3 + 1)]], 2)
                                                  + np.power(dif_train[dif_labels[(i * 3 + 2)]], 2))
        xyz_mag_test[mag_xyz_labels[i]] = np.sqrt(np.power(dif_test[dif_labels[(i * 3)]], 2)
                                                 + np.power(dif_test[dif_labels[(i * 3 + 1)]], 2)
                                                 + np.power(dif_test[dif_labels[(i * 3 + 2)]], 2))
        
    final_X_train = pd.concat([means_train, med_train, sd_train, mad_train, rms_train, dif_train, 
                               xy_mag_train, yz_mag_train, xz_mag_train, xyz_mag_train], axis = 1)
    final_X_test = pd.concat([means_test, med_test, sd_test, mad_test, rms_test, dif_test, 
                              xy_mag_test, yz_mag_test, xz_mag_test, xyz_mag_test], axis = 1)
        
    return final_X_train, final_X_test

In [None]:
all_columns = ['body_acc_x_mean', 'body_acc_y_mean', 'body_acc_z_mean',
               'body_gyro_x_mean', 'body_gyro_y_mean', 'body_gyro_z_mean',
               'total_acc_x_mean', 'total_acc_y_mean', 'total_acc_z_mean',
               
               'body_acc_x_med', 'body_acc_y_med', 'body_acc_z_med', 
               'body_gyro_x_med', 'body_gyro_y_med', 'body_gyro_z_med',
               'total_acc_x_med', 'total_acc_y_med', 'total_acc_z_med',
               
               'body_acc_x_var', 'body_acc_y_var', 'body_acc_z_var',
               'body_gyro_x_var', 'body_gyro_y_var', 'body_gyro_z_var',
               'total_acc_x_var', 'total_acc_y_var', 'total_acc_z_var',
               
               'body_acc_x_mad', 'body_acc_y_mad', 'body_acc_z_mad', 
               'body_gyro_x_mad', 'body_gyro_y_mad', 'body_gyro_z_mad', 
               'total_acc_x_mad', 'total_acc_y_mad', 'total_acc_z_mad',
               
               'body_acc_x_rms', 'body_acc_y_rms', 'body_acc_z_rms',
               'body_gyro_x_rms', 'body_gyro_y_rms', 'body_gyro_z_rms',
               'total_acc_x_rms', 'total_acc_y_rms', 'total_acc_z_rms',
               
               'body_acc_x_dif', 'body_acc_y_dif', 'body_acc_z_dif',
               'body_gyro_x_dif', 'body_gyro_y_dif', 'body_gyro_z_dif',
               'total_acc_x_dif', 'total_acc_y_dif', 'total_acc_z_dif',
               
               'body_acc_xy_mag', 'body_gyro_xy_mag', 'total_acc_xy_mag',
               
               'body_acc_yz_mag', 'body_gyro_yz_mag', 'total_acc_yz_mag',
               
               'body_acc_xz_mag', 'body_gyro_xz_mag', 'total_acc_xz_mag', 
               
               'body_acc_xyz_mag', 'body_gyro_xyz_mag', 'total_acc_xyz_mag']

## Plain data

In [None]:
# Training data
plain_body_acc_x_train = pd.read_csv('plain_har_data/train_body_acc_x.csv')
plain_body_acc_y_train = pd.read_csv('plain_har_data/train_body_acc_y.csv')
plain_body_acc_z_train = pd.read_csv('plain_har_data/train_body_acc_z.csv')
plain_body_gyro_x_train = pd.read_csv('plain_har_data/train_body_gyro_x.csv')
plain_body_gyro_y_train = pd.read_csv('plain_har_data/train_body_gyro_y.csv')
plain_body_gyro_z_train = pd.read_csv('plain_har_data/train_body_gyro_z.csv')
plain_total_acc_x_train = pd.read_csv('plain_har_data/train_total_acc_x.csv')
plain_total_acc_y_train = pd.read_csv('plain_har_data/train_total_acc_y.csv')
plain_total_acc_z_train = pd.read_csv('plain_har_data/train_total_acc_z.csv')
# Put the x_train references in an array
plain_X_train = [plain_body_acc_x_train, plain_body_acc_y_train, plain_body_acc_z_train, 
                 plain_body_gyro_x_train, plain_body_gyro_y_train, plain_body_gyro_z_train,
                 plain_total_acc_x_train, plain_total_acc_y_train, plain_total_acc_z_train]
# Testing data
plain_body_acc_x_test = pd.read_csv('plain_har_data/test_body_acc_x.csv')
plain_body_acc_y_test = pd.read_csv('plain_har_data/test_body_acc_y.csv')
plain_body_acc_z_test = pd.read_csv('plain_har_data/test_body_acc_z.csv')
plain_body_gyro_x_test = pd.read_csv('plain_har_data/test_body_gyro_x.csv')
plain_body_gyro_y_test = pd.read_csv('plain_har_data/test_body_gyro_y.csv')
plain_body_gyro_z_test = pd.read_csv('plain_har_data/test_body_gyro_z.csv')
plain_total_acc_x_test = pd.read_csv('plain_har_data/test_total_acc_x.csv')
plain_total_acc_y_test = pd.read_csv('plain_har_data/test_total_acc_y.csv')
plain_total_acc_z_test = pd.read_csv('plain_har_data/test_total_acc_z.csv')
# Put the x_test references in an array
plain_X_test = [plain_body_acc_x_test, plain_body_acc_y_test, plain_body_acc_z_test, 
                plain_body_gyro_x_test, plain_body_gyro_y_test, plain_body_gyro_z_test,
                plain_total_acc_x_test, plain_total_acc_y_test, plain_total_acc_z_test]

In [None]:
#for i in range(len(plain_X_train)):
#    plain_X_train[i].iloc[:, 0].plot()
#plt.title('An instance of training data')
#plt.legend(['body_acc_x', 'body_acc_y', 'body_acc_z', 
#            'body_gyro_x', 'body_gyro_y', 'body_gyro_z', 
#            'total_acc_x', 'total_acc_y', 'total_acc_z'])
#plt.show()
#plain_X_train[0].iloc[:, 0].plot()
#plt.title('body_acc_x')
#plt.show()
#plain_X_train[1].iloc[:, 0].plot()
#plt.title('body_acc_y')
#plt.show()
#plain_X_train[2].iloc[:, 0].plot()
#plt.title('body_acc_z')
#plt.show()
#plain_X_train[3].iloc[:, 6:10].plot()
#plt.title('body_gyro_x')
#plt.show()
#plain_X_train[4].iloc[:, 6:10].plot()
#plt.title('body_gyro_y')
#plt.show()
#plain_X_train[5].iloc[:, 6:10].plot()
#plt.title('body_gyro_z')
#plt.show()
#plain_X_train[6].iloc[:, 6:10].plot()
#plt.title('total_acc_x')
#plt.show()
#plain_X_train[7].iloc[:, 6:10].plot()
#plt.title('total_acc_y')
#plt.show()
#plain_X_train[8].iloc[:, 6:10].plot()
#plt.title('total_acc_z')
#plt.show()

In [None]:
#plain_X_train[0].iloc[:, 0:9].hist(alpha = 0.3) # X can be gaussian
#plain_X_train[1].iloc[:, 0].hist(alpha = 0.3) # Y
#plain_X_train[2].iloc[:, 0].hist(alpha = 0.3) # Z can be gaussian
#plain_X_train[3].iloc[:, 0].hist(alpha = 0.3) # X can be gaussian
#plain_X_train[4].iloc[:, 0].hist(alpha = 0.3) # Y can be gaussian
#plain_X_train[5].iloc[:, 0].hist(alpha = 0.3) # Z
#plain_X_train[6].iloc[:, 0].hist(alpha = 0.3) # X can be gaussian
#plain_X_train[7].iloc[:, 0].hist(alpha = 0.3) # Y can be gaussian
#plain_X_train[8].iloc[:, 0].hist(alpha = 0.3) # Z can be gaussian
#plt.show()

In [None]:
plain_X_train, plain_X_test = getFeatures(plain_X_train, plain_X_test)

In [None]:
print(plain_X_train.shape)
print(plain_X_test.shape)

In [None]:
stand = StandardScaler()
plain_X_train = pd.DataFrame(stand.fit_transform(plain_X_train), columns = all_columns)
plain_X_test = pd.DataFrame(stand.fit_transform(plain_X_test), columns = all_columns)
plain_X_train.describe()

In [None]:
# reshape for CNN
plain_X_train = plain_X_train.to_numpy()
plain_X_train = plain_X_train.reshape(plain_X_train.shape[0], plain_X_train.shape[1], 1)
plain_X_test = plain_X_test.to_numpy()
plain_X_test = plain_X_test.reshape(plain_X_test.shape[0], plain_X_test.shape[1], 1)

## Transformation 1

Steps

1: Moving average filter 2 (t2)
    
    - All
    - Improves precision for Walking, Walking Upstairs, and Walking Downstairs
    - Less precision for Sitting and Standing

2: Difference transformation for (t2a, t2b)
    
    - all body_acc_x, body_acc_y, body_acc_z
    - improves Walking Upstairs and Walking Downstairs

3: Yeo-Johnson and then difference transformation (t2c, t2d, t2e)
    
    - body_gyro_x, body_gyro_y, body_gyro_z

In [None]:
# Training data
t1_body_acc_x_train = pd.read_csv('t1b/prep_train_body_acc_x.csv')
t1_body_acc_y_train = pd.read_csv('t1a/prep_train_body_acc_y.csv')
t1_body_acc_z_train = pd.read_csv('t1/prep_train_body_acc_z.csv')
t1_body_gyro_x_train = pd.read_csv('t1c/prep_train_body_gyro_x.csv')
t1_body_gyro_y_train = pd.read_csv('t1d/prep_train_body_gyro_y.csv')
t1_body_gyro_z_train = pd.read_csv('t1e/prep_train_body_gyro_z.csv')
t1_total_acc_x_train = pd.read_csv('t1/prep_train_total_acc_x.csv')
t1_total_acc_y_train = pd.read_csv('t1/prep_train_total_acc_y.csv')
t1_total_acc_z_train = pd.read_csv('t1/prep_train_total_acc_z.csv')
# Put the x_train references in an array
t1_X_train = [t1_body_acc_x_train, t1_body_acc_y_train, t1_body_acc_z_train, 
              t1_body_gyro_x_train, t1_body_gyro_y_train, t1_body_gyro_z_train,
              t1_total_acc_x_train, t1_total_acc_y_train, t1_total_acc_z_train]
# Testing data
t1_body_acc_x_test = pd.read_csv('t1b/prep_test_body_acc_x.csv')
t1_body_acc_y_test = pd.read_csv('t1a/prep_test_body_acc_y.csv')
t1_body_acc_z_test = pd.read_csv('t1/prep_test_body_acc_z.csv')
t1_body_gyro_x_test = pd.read_csv('t1c/prep_test_body_gyro_x.csv')
t1_body_gyro_y_test = pd.read_csv('t1d/prep_test_body_gyro_y.csv')
t1_body_gyro_z_test = pd.read_csv('t1e/prep_test_body_gyro_z.csv')
t1_total_acc_x_test = pd.read_csv('t1/prep_test_total_acc_x.csv')
t1_total_acc_y_test = pd.read_csv('t1/prep_test_total_acc_y.csv')
t1_total_acc_z_test = pd.read_csv('t1/prep_test_total_acc_z.csv')
# Put the x_test references in an array
t1_X_test = [t1_body_acc_x_test, t1_body_acc_y_test, t1_body_acc_z_test, 
             t1_body_gyro_x_test, t1_body_gyro_y_test, t1_body_gyro_z_test,
             t1_total_acc_x_test, t1_total_acc_y_test, t1_total_acc_z_test]

In [None]:
for i in range(len(t1_X_train)):
    t1_X_train[i].iloc[:, 0].plot()
plt.title('An instance of training data')
plt.legend(['body_acc_x', 'body_acc_y', 'body_acc_z', 
            'body_gyro_x', 'body_gyro_y', 'body_gyro_z', 
            'total_acc_x', 'total_acc_y', 'total_acc_z'])
plt.show()

In [None]:
#t1_X_train[2].iloc[:, 0:20].hist(alpha = 0.3) # X
#t1_X_train[1].iloc[:, 0].hist(alpha = 0.3) # Y
#t1_X_train[2].iloc[:, 0].hist(alpha = 0.3) # Z
#t1_X_train[3].iloc[:, 0].hist(alpha = 0.3) # X
#t1_X_train[4].iloc[:, 0].hist(alpha = 0.3) # Y
#t1_X_train[5].iloc[:, 0].hist(alpha = 0.3) # Z
#t1_X_train[6].iloc[:, 0].hist(alpha = 0.3) # X
#t1_X_train[7].iloc[:, 0].hist(alpha = 0.3) # Y
#t1_X_train[8].iloc[:, 0].hist(alpha = 0.3) # Z
#plt.show()

In [None]:
# compute features
t1_X_train, t1_X_test = getFeatures(t1_X_train, t1_X_test)

In [None]:
t1_X_train.describe()

In [None]:
stand = StandardScaler()
t1_X_train = pd.DataFrame(stand.fit_transform(t1_X_train), columns = all_columns)
t1_X_test = pd.DataFrame(stand.fit_transform(t1_X_test), columns = all_columns)
t1_X_train.describe()

In [None]:
t1_X_train = t1_X_train.to_numpy()
t1_X_train = t1_X_train.reshape(t1_X_train.shape[0], t1_X_train.shape[1], 1)
t1_X_test = t1_X_test.to_numpy()
t1_X_test = t1_X_test.reshape(t1_X_test.shape[0], t1_X_test.shape[1], 1)

## Create model 

In [None]:
def build_model():
    # Convolutional base
    model = models.Sequential([
        layers.Conv1D(filters = 10, kernel_size = 20, activation = 'relu', input_shape = (plain_X_train.shape[1], plain_X_train.shape[2])),
        layers.Dropout(0.3),
        layers.MaxPooling1D(pool_size = 4),
        layers.Flatten(),
        layers.Dense(32, activation = 'relu'),
        layers.Dense(6, activation = 'softmax')     
    ])
    
    model.compile(optimizer = "adam",
             loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
             metrics = ['accuracy'])
    
    return model

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5)

In [None]:
model0 = build_model()
model0.summary()

## Test plain data 

In [None]:
EPOCHS = 100

early_stop = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', 
                                              mode = 'min', 
                                              patience = 5,
                                              verbose = 1)

plain_history = model0.fit(plain_X_train, y_train, 
                    epochs = EPOCHS, 
                    callbacks = [early_stop],
                    validation_data = (plain_X_test, y_test),
                    verbose = 1)

In [None]:
preds0 = model0.predict(plain_X_test)

In [None]:
# process output
preds0_summarized = np.zeros(y_test.shape[0])
# set the value of predictions_summarized as the index of the 
# max value of the column at predictions[i]
for i in range(preds0.shape[0]):
    preds0_summarized[i] = preds0[i].argmax()

In [None]:
print(classification_report(y_test, preds0_summarized, target_names = classes))

In [None]:
# Create confusion matrix
cm0 = confusion_matrix(y_test, preds0_summarized, num_classes = 6).numpy()

In [None]:
# Plot confusion matrix
df_cm = pd.DataFrame(cm0, 
                     index = [i for i in classes], 
                     columns = [i for i in classes])
df_cm
plt.figure(figsize = (20,15))
sn.set(font_scale=1.5)
sn.heatmap(df_cm, annot=True, linewidths = 0.4, square = True)

## Test transormation 1 data

In [None]:
model1 = build_model()

In [None]:
EPOCHS = 100

early_stop = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', 
                                              mode = 'min', 
                                              patience = 5,
                                              verbose = 1)

t1_history = model1.fit(t1_X_train, y_train, 
                    epochs = EPOCHS, 
                    callbacks = [early_stop],
                    validation_data = (t1_X_test, y_test),
                    verbose = 1)

In [None]:
preds1 = model1.predict(t1_X_test)

In [None]:
# process output
preds1_summarized = np.zeros(y_test.shape[0])
# set the value of predictions_summarized as the index of the 
# max value of the column at predictions[i]
for i in range(preds1.shape[0]):
    preds1_summarized[i] = preds1[i].argmax()

In [None]:
print(classification_report(y_test, preds1_summarized, target_names = classes))

In [None]:
# Create confusion matrix
cm1 = confusion_matrix(y_test, preds1_summarized, num_classes = 6).numpy()

In [None]:
# Plot confusion matrix
df_cm = pd.DataFrame(cm1, 
                     index = [i for i in classes], 
                     columns = [i for i in classes])
df_cm
plt.figure(figsize = (20,15))
sn.set(font_scale=1.5)
sn.heatmap(df_cm, annot=True, linewidths = 0.4, square = True)