In [1]:

import numpy as np
import pandas as pd
import sqlite3
import os 
from datetime import datetime

import sys
sys.path.append('..') # in order to import modules from my own package
from packageMeinhart import functionsMasterProjectMeinhart as fmpm
from packageMeinhart.functionsMasterProjectMeinhart import print_precision_recall_accuracy
from packageMeinhart.functionsMasterProjectMeinhart import print_misclassified_data_points

In [2]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [3]:
#sys.path

In [4]:
# define some parameters
element_size = 6
time_steps = 48 # number of steps for 6 s at 8 Hz
num_classes = 11
batch_size = 128
hidden_layer_size = 128

In [5]:
def one_hot(vec, vals=num_classes):
    n = len(vec)
    out = np.zeros((n, vals))
    out[range(n), vec] = 1
    return out

In [6]:
#one_hot(y_test_all.astype(int))

## Training and test data

In [7]:
# load all data, except data from one subject (test data)
test_data_subject = 1

db_name='E:\Jupyter_Notebooks\Master_Project_Meinhart\DataBase_Physio_with_nonEx.db' # database name
exercise_abbrs = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE'] # exercise abbreviations
# Connect to an existing database
conn = sqlite3.connect(db_name)
cur = conn.cursor()
train_data_points = {} # dictionary with the exercise abbreviation as key
test_data_points = {}

for key in exercise_abbrs:
    # sql command to extract data
    query_sql = """
        SELECT r.start_time, r.stop_time, e.csv_file
        FROM subjects s
        INNER JOIN exercises e
        ON s.id = e.subject_id
        INNER JOIN paradigms p
        ON p.id = e.paradigm_id
        INNER JOIN repetitions r
        ON e.id = r.exercise_id
        WHERE p.abbreviation = '{}'
        AND NOT s.id = {}
        """.format(key, test_data_subject)
    # get data from data base and close connection
    train_data_points[key] = pd.read_sql_query(query_sql, conn)

for key in exercise_abbrs:
    # sql command to extract data
    query_sql = """
        SELECT r.start_time, r.stop_time, e.csv_file
        FROM subjects s
        INNER JOIN exercises e
        ON s.id = e.subject_id
        INNER JOIN paradigms p
        ON p.id = e.paradigm_id
        INNER JOIN repetitions r
        ON e.id = r.exercise_id
        WHERE p.abbreviation = '{}'
        AND s.id = {}
        """.format(key, test_data_subject)
    # get data from data base and close connection
    test_data_points[key] = pd.read_sql_query(query_sql, conn)
    
conn.close()

In [8]:
print('Number of data points (repetitions) for training:')
count = 0
for key in exercise_abbrs:
    print(key + ':\t' + str(train_data_points[key].shape[0]))
    count += train_data_points[key].shape[0]
print('total:\t' + str(count))

print('\nNumber of data points (repetitions) for testing:')
count = 0
for key in exercise_abbrs:
    print(key + ':\t' + str(test_data_points[key].shape[0]))
    count += test_data_points[key].shape[0]
print('total:\t' + str(count))

Number of data points (repetitions) for training:
RF:	239
RO:	240
RS:	240
LR:	241
BC:	242
TC:	243
MP:	242
SA:	242
P1:	240
P2:	239
NE:	3712
total:	6120

Number of data points (repetitions) for testing:
RF:	30
RO:	30
RS:	30
LR:	30
BC:	31
TC:	30
MP:	30
SA:	31
P1:	30
P2:	30
NE:	407
total:	709


In [9]:
# Head of one loaded data frame as an example:
train_data_points['NE'].head()

Unnamed: 0,start_time,stop_time,csv_file
0,0.0,3.6097522701321,subject02_00_nonEx.csv
1,3.6097522701321,5.98056861437206,subject02_00_nonEx.csv
2,5.98056861437206,7.84471642992804,subject02_00_nonEx.csv
3,7.84471642992804,12.3377339822144,subject02_00_nonEx.csv
4,12.3377339822144,15.5979262935134,subject02_00_nonEx.csv


In [10]:
# dictionary for labels
ex_abbr2ind = { 'RF':0,
                'RO':1,
                'RS':2,
                'LR':3,
                'BC':4,
                'TC':5,
                'MP':6,
                'SA':7,
                'P1':8,
                'P2':9,
                'NE':10}

ex_ind2abbr = {index: abbr for abbr, index in ex_abbr2ind.items()}

In [11]:
len(ex_abbr2ind)

11

In [12]:
# putting all train data and labels together
all_train_data = np.concatenate([train_data_points[ex_ind2abbr[ii]] for ii in range(len(ex_ind2abbr))], axis=0)

y_train_all = np.zeros(np.shape(all_train_data)[0])
start_ind = 0
for ii in range(len(ex_ind2abbr)):
    stop_ind = len(train_data_points[ex_ind2abbr[ii]]) + start_ind
    y_train_all[start_ind:stop_ind] = ii
    start_ind = stop_ind

In [13]:
len(all_train_data)

6120

In [14]:
print(all_train_data[:3])

[['0.6509139384920637' '3.911928323412699' 'subject02_RF_05.csv']
 ['3.911928323412699' '7.031159474206351' 'subject02_RF_05.csv']
 ['7.031159474206351' '10.398511284722224' 'subject02_RF_05.csv']]


In [None]:
X_train_dir  = 'E:\Physio_Features'
X_train_name = 'RNN_X_train_without_subject{0:02}_8Hz.csv'.format(
                    test_data_subject)
y_train_name = 'RNN_y_train_without_subject{0:02}_8Hz.csv'.format(
                    test_data_subject)
seqlens_train_name = 'RNN_seqlens_train_without_subject{0:02}_8Hz.csv'.format(
                    test_data_subject)

X_train_path = os.path.join(X_train_dir, X_train_name)
y_train_path = os.path.join(X_train_dir, y_train_name)
seqlens_train_path = os.path.join(X_train_dir, seqlens_train_name)

In [None]:
# only generate the train data if they do not already exist
if not os.path.isfile(X_train_path):
    
    # Generating one corresponding matrix for train data
    X_train_all = []
    seqlens_train_all = []

    factor_256_to_8Hz = 32
    steps_6s_256Hz = 6 * 256

    # directory of csv file
    csv_dir='E:\Physio_Data_Split_Ex_and_NonEx'

    for ii in range(len(all_train_data)):

        data = all_train_data[ii]

        signal_data = fmpm.get_sensor_data(os.path.join(csv_dir, data[2]),
                    signals=['Acc','Gyr'], 
                    sampling_rate=256,
                    start_time=float(data[0]), 
                    stop_time=float(data[1]))

        X_one = np.zeros((time_steps, element_size))
        seqlens_train_all.append(int(len(signal_data['Acc'][:,0]) / factor_256_to_8Hz)) # all columns have same length

        col_inc = 0
        for sig in ['Acc','Gyr']:
            for col in [0,1,2]:
                puffer_6s_256Hz = np.zeros(steps_6s_256Hz) # 6 s at sampling rate 256 Hz
                puffer_6s_256Hz[:len(signal_data[sig][:,col])] = signal_data[sig][:,col]
                puffer_6s_8Hz = puffer_6s_256Hz.reshape(-1, factor_256_to_8Hz).mean(axis=1)
                X_one[:,col+col_inc] = puffer_6s_8Hz

            col_inc += 3

        X_train_all.append(X_one)
    
    X_train_all_save = np.array(X_train_all).reshape(6120,-1)
    
    np.savetxt(X_train_path, X_train_all_save, delimiter=";")
    np.savetxt(y_train_path, y_train_all, delimiter=";")
    np.savetxt(seqlens_train_path, seqlens_train_all, delimiter=";")
    
# otherwise load them
else:
    X_train_all_loaded = np.loadtxt(open(X_train_path), delimiter=";")
    X_train_all = X_train_all_loaded.reshape(-1, time_steps, element_size)

    y_train_all = np.loadtxt(open(y_train_path), delimiter=";")
    seqlens_train_all = np.loadtxt(open(seqlens_train_path), delimiter=";")

In [None]:
# putting all test data and labels together
all_test_data = np.concatenate([test_data_points[ex_ind2abbr[ii]] for ii in range(len(ex_ind2abbr))], axis=0)

y_test_all = np.zeros(np.shape(all_test_data)[0])
start_ind = 0
for ii in range(len(ex_ind2abbr)):
    stop_ind = len(test_data_points[ex_ind2abbr[ii]]) + start_ind
    y_test_all[start_ind:stop_ind] = ii
    start_ind = stop_ind

In [None]:
# define some parameters
element_size = 6
time_steps = 48 # number of steps for 6 s at 8 Hz
num_classes = 11
batch_size = 128
hidden_layer_size = 128

# Generating one corresponding matrix for test data
X_test_all = []
seqlens_test_all = []

factor_256_to_8Hz = 32
steps_6s_256Hz = 6 * 256

# directory of csv file
csv_dir='E:\Physio_Data_Split_Ex_and_NonEx'

for ii in range(len(all_test_data)):

    data = all_test_data[ii]

    signal_data = fmpm.get_sensor_data(os.path.join(csv_dir, data[2]),
                signals=['Acc','Gyr'], 
                sampling_rate=256,
                start_time=float(data[0]), 
                stop_time=float(data[1]))

    X_one = np.zeros((time_steps, element_size))
    seqlens_test_all.append(int(len(signal_data['Acc'][:,0]) / factor_256_to_8Hz)) # all columns have same length

    col_inc = 0
    for sig in ['Acc','Gyr']:
        for col in [0,1,2]:
            puffer_6s_256Hz = np.zeros(steps_6s_256Hz) # 6 s at sampling rate 256 Hz
            puffer_6s_256Hz[:len(signal_data[sig][:,col])] = signal_data[sig][:,col]
            puffer_6s_8Hz = puffer_6s_256Hz.reshape(-1, factor_256_to_8Hz).mean(axis=1)
            X_one[:,col+col_inc] = puffer_6s_8Hz

        col_inc += 3

    X_test_all.append(X_one)

In [None]:
np.shape(X_test_all)

In [None]:
# define some parameters
element_size = 6
time_steps = 96 # number of steps for 6 s at 16 Hz
num_classes = 11
batch_size = 128
hidden_layer_size = 128

# Generating one corresponding matrix for test data
X_test_all = []
seqlens_test_all = []

factor_256_to_8Hz = 16
steps_6s_256Hz = 6 * 256

# directory of csv file
csv_dir='E:\Physio_Data_Split_Ex_and_NonEx'

for ii in range(len(all_test_data)):

    data = all_test_data[ii]

    signal_data = fmpm.get_sensor_data(os.path.join(csv_dir, data[2]),
                signals=['Acc','Gyr'], 
                sampling_rate=256,
                start_time=float(data[0]), 
                stop_time=float(data[1]))

    X_one = np.zeros((time_steps, element_size))
    seqlens_test_all.append(int(len(signal_data['Acc'][:,0]) / factor_256_to_8Hz)) # all columns have same length

    col_inc = 0
    for sig in ['Acc','Gyr']:
        for col in [0,1,2]:
            puffer_6s_256Hz = np.zeros(steps_6s_256Hz) # 6 s at sampling rate 256 Hz
            puffer_6s_256Hz[:len(signal_data[sig][:,col])] = signal_data[sig][:,col]
            puffer_6s_8Hz = puffer_6s_256Hz.reshape(-1, factor_256_to_8Hz).mean(axis=1)
            X_one[:,col+col_inc] = puffer_6s_8Hz

        col_inc += 3

    X_test_all.append(X_one)

In [None]:
np.shape(X_test_all)

In [None]:
def get_train_batch(batch_size, X_train_all, y_train_all, seqlens_train_all):
    
    instance_indices = list(range(len(all_train_data)))
    np.random.shuffle(instance_indices)
    batch_indices = instance_indices[:batch_size]

    X = np.array(X_train_all)[batch_indices]
    y = y_train_all[batch_indices]
    seqlens = np.array(seqlens_train_all)[batch_indices]
    
    return X, y, seqlens

In [None]:
X, y, seqlens = get_train_batch(batch_size, X_train_all, y_train_all, seqlens_train_all)

In [None]:
np.shape(X)

In [None]:
os.getcwd()

In [None]:
# where to save TensorBoard model summaries

LOG_DIR_ALL = "logs/RNN_with_summaries"

# tensorboard --logdir=logs/RNN_with_summaries

#  http://FlorianMeinhart:6006

# define some parameters
#element_size = 6
#time_steps = 48 # number of steps for 6 s at 8 Hz
#num_classes = 11
#batch_size = 128
#hidden_layer_size = 128

batch_size = 256
time_steps = None

In [None]:
now = datetime.now()
LOG_DIR_TRAIN = LOG_DIR_ALL + now.strftime('/%Y%m%d-%H%M%S' + '_train')
LOG_DIR_TEST = LOG_DIR_ALL + now.strftime('/%Y%m%d-%H%M%S' + '_test')

tf.reset_default_graph()

with tf.name_scope('data'):
    inputs = tf.placeholder(tf.float32, shape=[None, time_steps, element_size], name='inputs')
    labels = tf.placeholder(tf.int32, shape=[None, num_classes], name='labels')
    seqlens = tf.placeholder(tf.int32, shape=[None], name='seqlens')

with tf.name_scope('RNN_layer'):
    #rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_layer_size)
    rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer_size)
    outputs, states = tf.nn.dynamic_rnn(rnn_cell, inputs, sequence_length=seqlens, dtype=tf.float32)

with tf.name_scope('linear_layer'):
    W1 = tf.Variable(tf.truncated_normal([hidden_layer_size, num_classes], mean=0, stddev=0.1), name='weights_linear')
    b1 = tf.Variable(tf.truncated_normal([num_classes], mean=0, stddev=0.1), name='biases_linear')
    #final_output = tf.matmul(states, W1) + b1
    final_output = tf.matmul(states[0], W1) + b1
    
    softmax = tf.nn.softmax_cross_entropy_with_logits_v2(logits=final_output, labels=labels)
    cross_entropy = tf.reduce_mean(softmax)
    tf.summary.scalar('cross_entropy', cross_entropy)


train_step = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cross_entropy)
#train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

with tf.name_scope('evaluation'):
    correct_prediction = tf.equal(tf.argmax(labels,1), tf.argmax(final_output,1), name='correct_prediction')
    accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32)))*100
    tf.summary.scalar('accuracy', accuracy)

merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(LOG_DIR_TRAIN)
test_writer = tf.summary.FileWriter(LOG_DIR_TEST)

print('tensorboard --logdir=' + LOG_DIR_ALL)

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_writer.add_graph(sess.graph)
    test_writer.add_graph(sess.graph)
    
    y_test_all_one_hot = one_hot(y_test_all.astype(int), vals=num_classes)
    
    for step in range(1001):
        x_batch, y_batch, seqlens_batch = get_train_batch(batch_size, X_train_all, y_train_all, seqlens_train_all)
        
        y_batch_one_hot = one_hot(y_batch.astype(int), vals=num_classes)
        
        sess.run(train_step, feed_dict={inputs:x_batch, labels:y_batch_one_hot, seqlens:seqlens_batch})
        
        if step % 20 == 0:
            summary_train, accuracy_train = sess.run([merged, accuracy], 
                                                      feed_dict={inputs:x_batch, 
                                                                 labels:y_batch_one_hot, 
                                                                 seqlens:seqlens_batch})
            print('Accuracy at step {}'.format(step))
            print('\tTrain Set: {:.3f}'.format(accuracy_train))
            train_writer.add_summary(summary_train, step)
    
            summary_test, batch_pred, accuracy_test = sess.run([merged, tf.argmax(final_output,1), accuracy],
                                                                feed_dict={inputs:X_test_all, 
                                                                           labels:y_test_all_one_hot, 
                                                                           seqlens:seqlens_test_all})
            test_writer.add_summary(summary_test, step)
    
            print('\tTest Set:  {:.3f}'.format(accuracy_test))

In [None]:
def one_hot_back(mat_one_hot, vals=num_classes):
    n = np.shape(mat_one_hot)[0]
    out = np.zeros(n)
    for ii in range(num_classes):
        ind = np.where(mat_one_hot[:,ii])
        out[ind] = ii
    return out

In [None]:
one_hot_back(y_batch_one_hot, vals=num_classes)

In [None]:
print_precision_recall_accuracy(batch_pred, y_test_all.astype(int))

In [None]:
print_misclassified_data_points(batch_pred, y_test_all.astype(int))

In [None]:
print_precision_recall_accuracy([0,1,2,3], [0,1,2,2])