# Artificial Neural Network

In [18]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import to_categorical
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

import warnings
warnings.filterwarnings('ignore')

random.seed(321)

In [19]:
tf.__version__

'2.2.0'

### Prepare data

In [20]:
df  = pd.read_csv('/Users/N1/Data3/Data-2020/10_code/60_data_for_models/rolled_no_aw_no_28.csv')

In [21]:
le = LabelEncoder()
df['Subject_ID'] = le.fit_transform(df['Subject_ID'])

In [22]:
df.tail(3)

Unnamed: 0,ACC1,ACC2,ACC3,TEMP,EDA,BVP,HR,Round,Magnitude,Activity,Subject_ID
263301,14.29129,-8.854751,64.277941,32.109,0.606237,0.027,92.236937,1,66.536121,Type,54
263302,14.684729,-8.987421,64.309118,32.1085,0.608734,-0.01875,92.266312,1,66.727613,Type,54
263303,15.156561,-9.14733,64.278824,32.108,0.610815,0.1365,92.2975,1,66.908125,Type,54


In [23]:
y = df['Activity']
X = df.drop(['Activity', 'Round'], axis =1)

print(y.shape, X.shape)

(263304,) (263304, 9)


In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

### Label Encode Activity

In [25]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
#y_val = le.transform(y_test)

print(X_train.shape,  y_train.shape, X_test.shape, y_test.shape)

(184312, 9) (184312,) (78992, 9) (78992,)


### One-Hot Encode Subject ID

In [26]:
X_train['train'] =1
X_test['train'] = 0

combined = pd.concat([X_train, X_test])
combined = pd.concat([combined, pd.get_dummies(combined['Subject_ID'])], axis =1)

In [27]:
X_train = combined[combined['train'] == 1]
X_test = combined[combined['train'] == 0]

X_train.drop(["train", "Subject_ID"], axis = 1, inplace = True)
X_test.drop(["train", "Subject_ID"], axis = 1, inplace = True)
print(X_train.shape, X_test.shape, X_train.shape[0] + X_test.shape[0])

(184312, 63) (78992, 63) 263304


### Normalize Data

In [28]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
X_train.iloc[:,:8] = ss.fit_transform(X_train.iloc[:,:8])
X_test.iloc[:,:8] = ss.transform(X_test.iloc[:,:8])

In [29]:
X_train = X_train.values
X_test = X_test.values

In [30]:
y_train_dummy = np_utils.to_categorical(y_train)
y_test_dummy = np_utils.to_categorical(y_test)

In [31]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(184312, 63) (184312,) (78992, 63) (78992,)


### Model

In [32]:
from keras.callbacks import ModelCheckpoint
filepath="models/weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [35]:
# Define per-fold score containers <-- these are new
acc_per_fold = []
loss_per_fold = []

# Merge inputs and targets
inputs = X_train
targets = y_train_dummy

In [37]:
inputs.shape[0]

184312

In [36]:
from sklearn.model_selection import KFold
# Define the K-fold Cross Validator
kfold = KFold(n_splits=inputs.shape[0], shuffle=True)

# LOOCV
fold_no = 1
for train, test in kfold.split(inputs, targets):
    #Define the model architecture
    model = Sequential()
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(4, activation='softmax')) #4 outputs are possible 
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
      # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history = model.fit(inputs[train], targets[train],
              batch_size=32,
              epochs=10,
              verbose=1)

    # Generate generalization metrics
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])
    
    # Increase fold number
    fold_no = fold_no + 1
    

# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')

for i in range(0, len(acc_per_fold)):
    print('------------------------------------------------------------------------')
    print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

KeyboardInterrupt: 

### Ignore everything below here since the test set was used for validation

In [87]:
y_pred = np.argmax(model.predict(X_test), axis=-1)

In [88]:
pd.unique(y_pred)

array([0, 2, 1, 3])

In [89]:
# col 1 = y_pred
# col 2 = y_test ground truth labels
print(np.concatenate((y_pred.reshape(-1, 1), y_test.reshape(-1,1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [1 1]
 [2 2]]


In [90]:
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
confusion_matrix(y_pred, y_test)

array([[36260,   246,   205,    33],
       [  482, 28843,   765,   344],
       [  116,    92,  5370,     0],
       [   38,   157,    15,  6026]])

In [91]:
accuracy_score(y_pred, y_test)

0.9684398420093174

In [92]:
f1_score(y_pred, y_test, average = 'weighted')

0.9687531567138236