# First Approach - Recurrent Neural Network

In [1]:
from IPython.display import clear_output
from pivottablejs import pivot_ui
from datetime import datetime
import sqlite3
import pandas as pd
import numpy as np
import os

import sys
sys.path.append('..') # in order to import modules from my own package
from packageMeinhart import functionsMasterProjectMeinhart as fmpm
from packageMeinhart.functionsMasterProjectMeinhart import print_precision_recall_accuracy
from packageMeinhart.functionsMasterProjectMeinhart import print_misclassified_data_points

## Class to handle data for RNN

In [2]:
def load_data_from_database(data_base_path='DataBase_Physio_with_nonEx.db'):
    '''
    Function to load the following data from data base:
        - subject IDs
        - exercise abbreviations
        - number of repetitions
        - sequence numbers
        - start times
        - stop times
        - csv-file name
    
    Parameters
    ----------
    data_base_path : string
        Path to data base.
    
    Returns
    -------
    DataFrame
        DataFrame with the listet information (see above).
    '''
    # Connect to an existing database
    conn = sqlite3.connect(data_base_path)
    cur = conn.cursor()

    # sql command to extract data
    query_sql = """
        SELECT e.subject_id,
        p.abbreviation,
        e.num_rep,
        r.sequence_num,
        r.start_time, r.stop_time,
        e.csv_file
        FROM subjects s
        INNER JOIN exercises e
        ON s.id = e.subject_id
        INNER JOIN paradigms p
        ON p.id = e.paradigm_id
        INNER JOIN repetitions r
        ON e.id = r.exercise_id
        """
    
    # get data from data base and close connection
    all_data_points_df = pd.read_sql_query(query_sql, conn)
    conn.close()
    
    return all_data_points_df

In [3]:
def select_data_points_from_df(all_data_points_df,
                               subject_ids=-1,
                               subject_ids_complementary=[],
                               reps=-1,
                               abbrs=-1,
                               with_non_Ex=True,
                               sub_id_key='subject_id',
                               num_rep_key='num_rep',
                               abbreviation_key='abbreviation'):
    '''
    Function to select data points from a DataFrame based on subject IDs,
    number of repetitions and exercise abbreviations.
    
    Parameters
    ----------
    all_data_points_df : pandas DataFrame
        DataFrame with all data points.
    
    subject_ids : int or list
        Subject IDs to select (e.g. [1, 2, 3]).
        --> default -1: Select all subjects.
        
    subject_ids_complementary : int or list
        If subject_ids is -1 --> select only subjects not in subject_ids_complementary.
        
    reps : int or list
        Repetition numbers to select (e.g. [5, 10]).
        --> default -1: Select all repetitions.
        
    abbrs : int or list
        Exercise abbreviations to select (e.g. ['RF', 'SA']).
        --> default -1: Select all exercise abbreviations.
    
    with_non_Ex : boolean
        If False --> omit non exercise data (data points with zero repetitions).
        
    sub_id_key : string
        Key of the DataFrame for subject IDs.
        
    num_rep_key : string
        Key of the DataFrame for repetition numbers.
        
    abbreviation_key : string
        Key of the DataFrame for exercise abbreviations.
    
    Returns
    -------
    DataFrame
        DataFrame with selected data points.
    '''
    
    data_points_df = all_data_points_df.copy()
    
    # select the subject IDs
    if subject_ids is -1 and subject_ids_complementary:
        if not isinstance(subject_ids_complementary, list): # if not list --> make list
            subject_ids_complementary = [subject_ids_complementary]
        data_points_df = data_points_df.loc[~data_points_df[sub_id_key].isin(subject_ids_complementary)]
        
    elif subject_ids is not -1:
        if not isinstance(subject_ids, list): # if not list --> make list
            subject_ids = [subject_ids]
        data_points_df = data_points_df.loc[data_points_df[sub_id_key].isin(subject_ids)]

    # select the repetition numbers
    if reps is not -1:
        if not isinstance(reps, list): # if not list --> make list
            reps = [reps]
        if with_non_Ex is True:
            reps.append(0) # zero repetitions correspond to non exercise data
        data_points_df = data_points_df.loc[data_points_df[num_rep_key].isin(reps)]

    elif with_non_Ex is False:
        data_points_df = data_points_df.loc[data_points_df[num_rep_key] != 0]
        
    # select the exercise abbreviations
    if abbrs is not -1:
        if not isinstance(abbrs, list): # if not list --> make list
            abbrs = [abbrs]
        data_points_df = data_points_df.loc[data_points_df[abbreviation_key].isin(abbrs)]

    return data_points_df

In [4]:
def print_progress_func(current_num, max_num, prev_prog, add_info=None):
    '''
    Function to print progress [%] in a loop.
    
    Parameters
    ----------
    current_num : int
        Number of the current run in a loop.
        
    max_num : int
        Maximum number of runs in a loop.
        
    prev_prog : int
        Previous progress, to print only if necessary.
        
    add_info : str
        Additional information to print instead of "Progress".
    
    Returns
    -------
    int
        Previous progress, important for next run.
    '''
    new_prog = int(current_num/max_num*100)
    
    if new_prog > prev_prog:
        clear_output(wait=True)
        
        if isinstance(add_info, str):
            print(add_info + ' {:3d}%'.format(new_prog))
        else:
            print('Progress: {:3d}%'.format(new_prog))
        
    return new_prog

In [5]:
def generate_sequences_from_separate_repetitions(data_points_df,
                   max_sequence_length=6,
                   orig_sampling_rate=256,
                   new_sampling_rate=8,
                   cutoff=10,
                   order=6,
                   csv_data_dir='E:\Physio_Data_Split_Ex_and_NonEx',
                   csv_skiprows=0,
                   csv_separator=',',
                   signal_abbrs=['Acc','Gyr'],
                   signal_orientations=['x','y','z'],
                   labels_abbr2num_dict={'RF':0,'RO':1,'RS':2,'LR':3,'BC':4,'TC':5,'MP':6,'SA':7,'P1':8,'P2':9,'NE':10},
                   abbreviation_key='abbreviation',
                   start_time_key='start_time',
                   stop_time_key='stop_time',
                   csv_file_key='csv_file',
                   print_progress=True,
                   progress_info='Generate sequences...'):
    '''
    Function to generate sequences from separate repetitions, changing the sampling rate
    and saving them to a tensor.
    
    Parameters
    ----------
    data_points_df : DataFrame
        DataFrame with information about data points (see load_data_from_database()).
        
    max_sequence_length : int or float
        Maximum sequence length to consider in seconds.
        
    orig_sampling_rate : int or float
        Original sampling rate of the signals in Hz.
        
    new_sampling_rate : int or float
        New sampling rate of the signals in Hz.
        
    cutoff : int or float
        Cutoff frequency for filtering.
    
    order : int
        Order of butterworth filter.
        
    csv_data_dir : string
        Directory of signal data csv-files.
        
    csv_skiprows : int
        Number of rows to skip for signal data csv-files.
        
    csv_separator : string
        Separator for signal data csv-files.
        
    signal_abbrs : list of strings
        Abbreviations of the signals (e.g. ['Acc','Gyr']).
    
    signal_orientations : list of strings
        Orientations of the signals (e.g. ['x','y','z']).
        
    labels_abbr2num_dict : dict
        Dictionary to convert exercise abbreviations to number (e.g. ={'RF':0,'RO':1,'RS':2, ... }).
    
    abbreviation_key : strings
        Exercise abbreviation key for DataFrame which contains data base entries.
        
    start_time_key : strings
        Start time key for DataFrame which contains data base entries.
        
    stop_time_key : strings
        Stop time key for DataFrame which contains data base entries.
        
    csv_file_key : strings
        csv-file key for DataFrame which contains data base entries.
        
    print_progress : boolean
        If True --> print progress at signal sequences generation.
        
    progress_info : strings
        Additional information to print with progress.
        
    
    Returns
    -------
    X_all, y_all, seqlens_all
        
        X_all ... tensor with signal sequences (dimensions: [number of data points, 
                                                             max sequence length, 
                                                             number of signals])
        y_all ... array with labels
        seqlens_all ... array with sequence lengths
    '''
    
    # max index of new sequences
    max_sequ_index = max_sequence_length * new_sampling_rate
    
    # number of signals (Acc: x, y, z; Gyr: x, y, z --> 6 signals)
    num_signals = len(signal_abbrs) * len(signal_orientations)
    
    # number to exercise-abbreviations dict
    labels_num2abbr_dict = {num: abbr for abbr, num in labels_abbr2num_dict.items()}
                                                   
    # create array for labels
    y_all = np.zeros(len(data_points_df), dtype=np.int8)
    
    # create tensor for sequences
    X_all = np.zeros((len(data_points_df), max_sequ_index, num_signals))
    
    # create matrix for sequence lengths
    seqlens_all = np.zeros(len(data_points_df), dtype=np.int)
    
    # sampling rate ratio of original and new sampling rate (e.g. if ratio = 32 --> take every 32nd index)
    sampling_rate_ratio = orig_sampling_rate / new_sampling_rate

    # location counter for the sequence tensor
    loc_count = 0

    # variables for progress printing
    if print_progress:
        prog_count = 0
        max_count = len(data_points_df.csv_file.unique()) # number of unique csv-files
        prev_progress = 0 # previous progress

    # going through all csv-files (unique --> only once for each file)
    for current_csv_file in data_points_df.csv_file.unique():

        # join file path
        file_path = os.path.join(csv_data_dir, current_csv_file)

        # load the signal data of the current file
        selected_data_df = pd.read_csv(file_path, skiprows=csv_skiprows, sep=csv_separator)
        
        # write data with selected signals to dict
        selected_data = {}
        for sig in signal_abbrs:
            selected_data[sig] = selected_data_df.filter(regex=sig+'*').values
            
        # filter data with butterworth filter and save to new dictionary
        selected_data_filt = {}
        for sig in signal_abbrs:
            selected_data_filt[sig] = fmpm.butter_lowpass_filter(selected_data[sig], 
                                                                 cutoff=cutoff, 
                                                                 fs=orig_sampling_rate, 
                                                                 order=order)
    
        # data frame with all repetitions of the current file
        current_data_points = data_points_df.loc[data_points_df[csv_file_key] == current_csv_file]

        # going through all repetitions of the current file
        for ii in range(len(current_data_points)):
            
            # get start and stop indices of current data point (current repetition)
            start_time = float(current_data_points.reset_index().loc[ii, start_time_key])
            stop_time = float(current_data_points.reset_index().loc[ii, stop_time_key])
            start_idx = round(start_time * orig_sampling_rate)
            stop_idx = round(stop_time * orig_sampling_rate)

            # consider the new sampling rate for signal data selection
            new_indices = np.arange(start_idx, stop_idx, sampling_rate_ratio).round().astype(int)
            
            # check if array of new indices is longer than max_sequ_index
            if len(new_indices) > max_sequ_index:
                new_indices = new_indices[:max_sequ_index] # take only that much indices
            
            # add current sequences with new sampling rate
            for kk, sig in enumerate(signal_abbrs):
                for ll in range(len(signal_orientations)):
                    # explanation: X_all[index_of_current_data_point, 
                    #                    select_all_until_length_of_new_signal_data, 
                    #                    index_of_current_signal (0...5)]
                    X_all[loc_count,:len(new_indices),kk*len(signal_orientations)+ll] = \
                        selected_data_filt[sig][new_indices,ll]

            # add current label
            current_ex_abbr = current_data_points.reset_index().loc[ii,abbreviation_key]
            y_all[loc_count] = labels_abbr2num_dict[current_ex_abbr]
            
            # add current sequence length
            seqlens_all[loc_count] = len(new_indices)

            loc_count += 1

        # print progress of feauture generation
        if print_progress:
            prog_count += 1
            prev_progress = print_progress_func(prog_count, max_count, prev_progress, add_info=progress_info)
    
    clear_output()
    
    return X_all, y_all, seqlens_all

In [6]:
class PhysioData_RNN():
    '''
    Class to handle signal data for RNN.
    
    Detailed description follows ...
    
    For now: look at docstrings of the following functions:
        - load_data_from_database()
        - select_data_points_from_df()
        - generate_sequences_from_separate_repetitions()
    
    '''
    def __init__(self,
                 test_subject_ids=-1,
                 train_subject_ids=-1,
                 test_rep_nums=-1,
                 train_rep_nums=-1,
                 test_ex_abbrs=-1,
                 train_ex_abbrs=-1,
                 with_non_Ex=True,
                 max_sequence_length=6,
                 orig_sampling_rate=256,
                 new_sampling_rate=8,
                 cutoff=10,
                 order=6,
                 csv_data_dir='E:\Physio_Data_Split_Ex_and_NonEx',
                 csv_skiprows=0,
                 csv_separator=',',
                 data_base_path='E:\Jupyter_Notebooks\Master_Project_Meinhart\DataBase_Physio_with_nonEx.db',
                 feat_save=False,
                 feat_load_if_present=False,
                 feat_save_dir='E:\Physio_Features',
                 print_progress=True,
                 signal_abbrs=['Acc','Gyr'],
                 signal_orientations=['x','y','z'],
                 labels_abbr2num_dict={'RF':0,'RO':1,'RS':2,'LR':3,'BC':4,'TC':5,'MP':6,'SA':7,'P1':8,'P2':9,'NE':10},
                 sub_id_key='subject_id',
                 num_rep_key='num_rep',
                 abbreviation_key='abbreviation',
                 start_time_key='start_time',
                 stop_time_key='stop_time',
                 csv_file_key='csv_file'):
        
        # load all data from data points
        self.all_data_points_df = load_data_from_database(data_base_path)
        
        # load data points for testing
        self.test_data_points_df =  select_data_points_from_df(self.all_data_points_df,
                                                               subject_ids=test_subject_ids,
                                                               subject_ids_complementary=[],
                                                               reps=test_rep_nums,
                                                               abbrs=test_ex_abbrs,
                                                               with_non_Ex=with_non_Ex,
                                                               sub_id_key=sub_id_key,
                                                               num_rep_key=num_rep_key,
                                                               abbreviation_key=abbreviation_key)
        
        # load data points for training
        self.train_data_points_df = select_data_points_from_df(self.all_data_points_df,
                                                               subject_ids=train_subject_ids,
                                                               subject_ids_complementary=test_subject_ids,
                                                               reps=train_rep_nums,
                                                               abbrs=train_ex_abbrs,
                                                               with_non_Ex=with_non_Ex,
                                                               sub_id_key=sub_id_key,
                                                               num_rep_key=num_rep_key,
                                                               abbreviation_key=abbreviation_key)
        
        # generate sequences for testing
        self.X_test, self.y_test, self.seqlens_test = generate_sequences_from_separate_repetitions(
                                                               self.test_data_points_df,
                                                               max_sequence_length=max_sequence_length,
                                                               orig_sampling_rate=orig_sampling_rate,
                                                               new_sampling_rate=new_sampling_rate,
                                                               cutoff=cutoff,
                                                               order=order,
                                                               csv_data_dir=csv_data_dir,
                                                               csv_skiprows=csv_skiprows,
                                                               csv_separator=csv_separator,
                                                               signal_abbrs=signal_abbrs,
                                                               signal_orientations=signal_orientations,
                                                               labels_abbr2num_dict=labels_abbr2num_dict,
                                                               abbreviation_key=abbreviation_key,
                                                               start_time_key=start_time_key,
                                                               stop_time_key=stop_time_key,
                                                               csv_file_key=csv_file_key,
                                                               print_progress=print_progress,
                                                               progress_info='Generate sequences for testing...')
        
        # generate sequences for testing
        self.X_train, self.y_train, self.seqlens_train = generate_sequences_from_separate_repetitions(
                                                               self.train_data_points_df,
                                                               max_sequence_length=max_sequence_length,
                                                               orig_sampling_rate=orig_sampling_rate,
                                                               new_sampling_rate=new_sampling_rate,
                                                               cutoff=cutoff,
                                                               order=order,
                                                               csv_data_dir=csv_data_dir,
                                                               csv_skiprows=csv_skiprows,
                                                               csv_separator=csv_separator,
                                                               signal_abbrs=signal_abbrs,
                                                               signal_orientations=signal_orientations,
                                                               labels_abbr2num_dict=labels_abbr2num_dict,
                                                               abbreviation_key=abbreviation_key,
                                                               start_time_key=start_time_key,
                                                               stop_time_key=stop_time_key,
                                                               csv_file_key=csv_file_key,
                                                               print_progress=print_progress,
                                                               progress_info='Generate sequences for training...')
    
    
    def get_train_batch(self, batch_size):
        '''
        Method to get batch with randomly selected training data.
        
        Parameters
        ----------
        batch_size : int
            Number of data points in batch.

        Returns
        -------
        X_train_batch, y_train_batch, seqlens_train_batch
        '''
        
        instance_indices = list(range(len(self.y_train))) # list with all train indices
        np.random.shuffle(instance_indices) # shuffle the train indices
        batch_indices = instance_indices[:batch_size] # randomly select train indices
    
        # select batch data with corresponding indices
        X_train_batch = np.array(self.X_train)[batch_indices]
        y_train_batch = self.y_train[batch_indices]
        seqlens_train_batch = np.array(self.seqlens_train)[batch_indices]

        return X_train_batch, y_train_batch, seqlens_train_batch

    
    # methods to get data
    def get_X_test(self):
        return self.X_test
    
    def get_y_test(self):
        return self.y_test
    
    def get_seqlens_test(self):
        return self.seqlens_test
    
    def get_X_train(self):
        return self.X_train
    
    def get_y_train(self):
        return self.y_train
    
    def get_sequlens_train(self):
        return self.sequlens_train
    
    
    # methods to get data points (DataFrames)
    def get_test_data_points(self):
        return self.test_data_points_df
    
    def get_train_data_points(self):
        return self.train_data_points_df
    
    def get_all_data_points(self):
        return self.all_data_points_df


### Create instance of the class *PhysioData_RNN*

In [7]:
PD_RNN1 = PhysioData_RNN(test_subject_ids=1)

In [8]:
np.shape(PD_RNN1.get_X_test())

(709, 48, 6)

In [9]:
np.shape(PD_RNN1.get_X_train())

(6120, 48, 6)

In [10]:
PD_RNN1.get_X_test()[0]

array([[-8.85008976e-01,  3.73013764e-01,  2.24020535e-01,
         1.21751042e+01,  1.52829258e+00,  9.10124605e+00],
       [-9.04367908e-01,  8.26654823e-01,  3.67851136e-01,
        -1.01565373e+01,  5.38131320e+00,  7.39012265e+01],
       [-1.01528357e+00,  1.07474501e+00,  3.88348309e-01,
        -2.42753698e+01, -2.82604112e+01,  1.39737623e+02],
       [-1.03430703e+00,  1.15137975e+00,  2.56821977e-01,
         1.23076905e+01, -3.72862596e+01,  1.91338412e+02],
       [-6.34748614e-01,  9.63927847e-01,  2.02384178e-01,
        -7.54283377e+00, -3.91051296e+01,  1.58483358e+02],
       [-2.93779369e-01,  7.57520082e-01,  2.75981183e-01,
        -5.35511185e+00, -2.56590433e+01,  1.41178164e+02],
       [ 1.04911864e-01,  5.41331501e-01,  2.45186189e-01,
         4.44534291e+00, -1.63078708e+01,  1.13614763e+02],
       [ 4.32216915e-01,  4.15494757e-01,  2.65919449e-01,
         1.68503415e+00, -2.25416173e+00,  6.80021935e+01],
       [ 6.11687397e-01,  4.31368824e-01,  2.806

In [11]:
PD_RNN1.get_y_test()[0]

0

In [12]:
PD_RNN1.get_seqlens_test()[0]

21

In [13]:
X_train_batch, y_train_batch, seqlens_train_batch = PD_RNN1.get_train_batch(batch_size=5)

In [14]:
np.shape(X_train_batch)

(5, 48, 6)

In [15]:
y_train_batch

array([10,  8, 10, 10,  7], dtype=int8)

In [16]:
seqlens_train_batch

array([27, 17, 16, 28, 24])

## Building a Recurrent Neural Network with *TensorFlow*

In [17]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [36]:
# directory to save TensorBoard model summaries
LOG_DIR_ALL = 'E:\Jupyter_Notebooks\Master_Project_Meinhart_git\logs\RNN_with_summaries'

# run the command:
print('tensorboard --logdir=' + LOG_DIR_ALL)

#  --> open http://FlorianMeinhart:6006

tensorboard --logdir=E:\Jupyter_Notebooks\Master_Project_Meinhart_git\logs\RNN_with_summaries


In [37]:
# define parameters for RNN
num_signals = 6 # number of signals (Acc: x, y, z; Gyr: x, y, z)
num_classes = 11 # number of classes
batch_size = 500 # number of data points for each batch
time_steps = None # --> dynamic_rnn
hidden_layer_size_rnn = 128 # number of neurons in the hidden layer of the RNN
num_steps = 301 # number of steps for training

In [38]:
# get time in order to append corresponding string to log directory
now = datetime.now()
LOG_DIR_TRAIN = LOG_DIR_ALL + now.strftime('\%Y%m%d-%H%M%S' + '_train')
LOG_DIR_TEST = LOG_DIR_ALL + now.strftime('\%Y%m%d-%H%M%S' + '_test')

In [39]:
# create new tensorflow graph
tf.reset_default_graph()

with tf.name_scope('data'):
    inputs = tf.placeholder(tf.float32, shape=[None, time_steps, num_signals], name='inputs')
    labels = tf.placeholder(tf.int32, shape=[None, num_classes], name='labels')
    seqlens = tf.placeholder(tf.int32, shape=[None], name='seqlens')

with tf.name_scope('RNN_layer'):
    #rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_layer_size)
    rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer_size_rnn)
    outputs, states = tf.nn.dynamic_rnn(rnn_cell, inputs, sequence_length=seqlens, dtype=tf.float32)

with tf.name_scope('linear_layer'):
    W1 = tf.Variable(tf.truncated_normal([hidden_layer_size_rnn, num_classes], mean=0, stddev=0.1), name='weights_linear')
    b1 = tf.Variable(tf.truncated_normal([num_classes], mean=0, stddev=0.1), name='biases_linear')
    #final_output = tf.matmul(states, W1) + b1
    final_output = tf.matmul(states[0], W1) + b1
    
    softmax = tf.nn.softmax_cross_entropy_with_logits_v2(logits=final_output, labels=labels)
    cross_entropy = tf.reduce_mean(softmax)
    tf.summary.scalar('cross_entropy', cross_entropy)


train_step = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cross_entropy)
#train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

with tf.name_scope('evaluation'):
    correct_prediction = tf.equal(tf.argmax(labels,1), tf.argmax(final_output,1), name='correct_prediction')
    accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32)))*100
    tf.summary.scalar('accuracy', accuracy)

# merge summaries and create summary writers for training and testing
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(LOG_DIR_TRAIN)
test_writer = tf.summary.FileWriter(LOG_DIR_TEST)

In [40]:
def one_hot(labels, num_classes):
    '''
    Convert lables to one-hot format.

    Parameters
    ----------
    labels : array or list of integers
        --> e.g. [0, 1, 4, 2, 0]
        
    num_classes : int
        Number of classes.
        (Has to be >= max(labels))

    Returns
    -------
    matrix
        Lables in one-hot format.
    
        --> e.g.
              [[1., 0., 0., 0., 0.],
               [0., 1., 0., 0., 0.],
               [0., 0., 0., 0., 1.],
               [0., 0., 1., 0., 0.],
               [1., 0., 0., 0., 0.]]
    '''
    n = len(labels)
    labels_one_hot = np.zeros((n, num_classes))
    labels_one_hot[range(n), labels] = 1
    return labels_one_hot

In [41]:
# open a session for the created graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_writer.add_graph(sess.graph)
    test_writer.add_graph(sess.graph)
    
    # get test labels in one-hot format
    y_test_all_one_hot = one_hot(PD_RNN1.get_y_test(), num_classes)
    
    # get sequence tensor for testing and corresponding sequence lengths
    X_test_all = PD_RNN1.get_X_test()
    seqlens_test_all = PD_RNN1.get_seqlens_test()
    
    # train the RNN with a defined number of steps
    for step in range(num_steps):
        # get batch data
        x_batch, y_batch, seqlens_batch = PD_RNN1.get_train_batch(batch_size)
        
        # get batch labels in one-hot format
        y_batch_one_hot = one_hot(y_batch, num_classes)
        
        # run the session
        sess.run(train_step, feed_dict={inputs:x_batch, labels:y_batch_one_hot, seqlens:seqlens_batch})
        
        # evaluate the RNN every 20 steps
        if step % 20 == 0:
            summary_train, accuracy_train = sess.run([merged, accuracy], 
                                                      feed_dict={inputs:x_batch, 
                                                                 labels:y_batch_one_hot, 
                                                                 seqlens:seqlens_batch})
            print('Accuracy at step {}'.format(step))
            print('\tTrain Set: {:.3f}'.format(accuracy_train))
            
            # write to train summary
            train_writer.add_summary(summary_train, step)
    
            summary_test, batch_pred, accuracy_test = sess.run([merged, tf.argmax(final_output,1), accuracy],
                                                                feed_dict={inputs:X_test_all, 
                                                                           labels:y_test_all_one_hot, 
                                                                           seqlens:seqlens_test_all})
            # write to test summary
            test_writer.add_summary(summary_test, step)
    
            print('\tTest Set:  {:.3f}'.format(accuracy_test))

Accuracy at step 0
	Train Set: 13.400
	Test Set:  9.591
Accuracy at step 20
	Train Set: 60.800
	Test Set:  53.738
Accuracy at step 40
	Train Set: 74.600
	Test Set:  77.715
Accuracy at step 60
	Train Set: 84.600
	Test Set:  81.382
Accuracy at step 80
	Train Set: 87.000
	Test Set:  85.049
Accuracy at step 100
	Train Set: 90.200
	Test Set:  88.858
Accuracy at step 120
	Train Set: 91.200
	Test Set:  90.691
Accuracy at step 140
	Train Set: 96.200
	Test Set:  92.525
Accuracy at step 160
	Train Set: 95.600
	Test Set:  95.063
Accuracy at step 180
	Train Set: 98.600
	Test Set:  93.794
Accuracy at step 200
	Train Set: 97.800
	Test Set:  94.358
Accuracy at step 220
	Train Set: 97.800
	Test Set:  93.230
Accuracy at step 240
	Train Set: 98.800
	Test Set:  95.487
Accuracy at step 260
	Train Set: 98.800
	Test Set:  95.346
Accuracy at step 280
	Train Set: 98.200
	Test Set:  95.487
Accuracy at step 300
	Train Set: 97.600
	Test Set:  96.192


### Evaluate the results

In [42]:
print_precision_recall_accuracy(batch_pred, PD_RNN1.get_y_test())

Exercise	Precision [%]	Recall [%]	Accuracy[%]
  RF		  100.00	   50.00	   97.88
  RO		   65.91	   96.67	   97.74
  RS		   84.85	   93.33	   99.01
  LR		  100.00	  100.00	  100.00
  BC		  100.00	  100.00	  100.00
  TC		  100.00	  100.00	  100.00
  MP		   96.67	   96.67	   99.72
  SA		   96.88	  100.00	   99.86
  P1		  100.00	  100.00	  100.00
  P2		   89.29	   83.33	   98.87
  NE		   99.51	   99.26	   99.29


In [43]:
print_misclassified_data_points(batch_pred, PD_RNN1.get_y_test())

27 misclassified (709 test data points):
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RF classified as RO
RO classified as NE
RS classified as P2
RS classified as P2
MP classified as NE
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
NE classified as P2
NE classified as SA
NE classified as MP
