## Attention!

Author: Justin Tan

RNN model with attention mechanism for rare decay identification in TensorFlow. See Bahdanau et. al (2016) and Yang et. al (2016).

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time, os

class config(object):
    # Set network parameters
    # Empirically, depth more important than layer size - output dimension
    mode = 'kst'
    channel = 'rho0'
    n_particles = 5
    n_features = 100
    seq_length = n_features/n_particles
    rnn_cell = 'gru' # 'gru'
    hidden_units = 256  # Number of neurons per RNN Cell
    keep_prob = 1.0
    input_keep_prob = 0.97
    recurrent_keep_prob = 0.97
    num_epochs = 256
    batch_size = 128
    num_layers = 2 # Note: 3 layers is considered 'deep'
    learning_rate = 1e-4
    lr_epoch_decay = 0.999
    ema_decay = 0.999
    n_classes = 2
    attention_dim = 512

class directories(object):
    data = 'data'
    tensorboard = 'tensorboard'
    checkpoints = 'checkpoints'
    samples = 'samples'
    
architecture = '{} - {} | Base cell: {} | Hidden units: {} | Layers: {} | Batch: {} | Epochs: {}'.format(
    config.channel, config.mode, config.rnn_cell, config.hidden_units, config.num_layers, config.batch_size, config.num_epochs)

class reader():
    def __init__(self, df):
        
        self.df = df
        self.batch_size = config.batch_size
        self.steps_per_epoch = len(df) // config.batch_size
        self.epochs = 0
        self.proceed = True
        self.shuffle()

    def shuffle(self):
        self.df = self.df.sample(frac=1).reset_index(drop=True)
        self.df_X = self.df.drop('Labels', axis = 1)
        self.df_y = self.df['Labels']
        self.pointer = 0

    def next_batch(self, batch_size):
        if self.pointer + 1 >= self.steps_per_epoch:
            inputs = self.df_X.iloc[self.pointer*batch_size:]
            targets = self.df_y.iloc[self.pointer*batch_size:]
            self.epochs += 1
            self.shuffle()
            self.proceed = False
            
        inputs = self.df_X.iloc[self.pointer*batch_size:(self.pointer+1)*batch_size]
        targets = self.df_y.iloc[self.pointer*batch_size:(self.pointer+1)*batch_size]
        self.pointer += 1
                
        return inputs, targets

def save_summary(config, delta_t, train_acc, test_acc):
    import json
    summary = {
        'Timestamp': time.strftime('%c'),
        'Base cell': config.rnn_cell,
        'Hidden units': config.hidden_units,
        'Layers': config.num_layers,
        'Batch_size': config.batch_size,
        'Seq_length': config.seq_length,
        'Dropout': config.keep_prob,
        'Epochs': config.num_epochs,
        'Time': delta_t,
        'Final train acc': train_acc,
        'Final test acc': test_acc
    }
    # Writing JSON data
    if os.path.isfile('rnn_summary.json'):
        with open('rnn_summary_{}.json.format(config.name)', 'r+') as f:
            new = json.load(f)
        new.append(summary)
        with open('rnn_summary.json', 'w') as f:
            json.dump(new, f, indent = 4)
    else:
        with open('rnn_summary.json', 'w') as f:
             json.dump([summary], f, indent = 4)
                
def load_data(file_name, test_size = 0.05):
    from sklearn.model_selection import train_test_split
    df = pd.read_hdf(file_name, 'df')
    df_X_train, df_X_test, df_y_train, df_y_test = train_test_split(df.drop('Labels', axis = 1),
                                                                    df['Labels'], test_size = test_size, random_state=42)
    return df_X_train, df_X_test, df_y_train, df_y_test

def p_ordering(df, ascending = True):
    # Drop errors, order particles by momentum descending
    df = df.drop([column for column in df.columns if column.endswith('Err')], axis = 1)
    Labels = df['Labels']
    blocks = np.split(df.drop('Labels', axis = 1), config.n_particles, axis = 1)
    cols_p = [column for column in df.columns if column.endswith('cms_p')]
    p_mean = [df[column].mean() for column in cols_p]
    if ascending:
        p_ordered_frames = [blocks[i] for i in np.argsort(p_mean)]
    else:
        p_ordered_frames = [blocks[i] for i in np.argsort(p_mean)[::-1]]
        
    p_ordered_frames.append(Labels)
    df_p_ordered = pd.concat(p_ordered_frames, axis = 1)
    return df_p_ordered

def plot_ROC_curve(network_output, y_true, meta = ''):
#     import matplotlib as mpl
#     mpl.use('pgf')
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.metrics import roc_curve, auc
    y_score = network_output[:,1]
    
    # Compute ROC curve, integrate
    fpr, tpr, thresholds = roc_curve(y_true, y_score)    
    roc_auc = auc(fpr, tpr)
    
    plt.figure()
    plt.axes([.1,.1,.8,.7])
    plt.figtext(.5,.9, r'Receiver Operating Characteristic', fontsize=15, ha='center')
    plt.figtext(.5,.85, meta, fontsize=10,ha='center')
    plt.plot(fpr, tpr, color='darkorange',
                     lw=2, label='ROC (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=1.0, linestyle='--')
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel(r'False Positive Rate')
    plt.ylabel(r'True Positive Rate')
    plt.legend(loc="lower right")
    plt.savefig(os.path.join('graphs', '{}_{}_ROC.pdf'.format(config.channel, config.mode)), format='pdf', dpi=1000)
    #plt.savefig(os.path.join('graphs', '{}_{}_ROC.pgf'.format(config.channel, config.mode)), format='pgf', dpi=1000)
    print('AUC: {:.4f}'.format(roc_auc))
    plt.show()
    plt.gcf().clear()

In [2]:
def attention_A(annotations, attention_dim, feedforward = True, my_method = True):
    
    sequence_length = annotations.get_shape()[1].value
    hidden_units = annotations.get_shape()[2].value
    A_re = tf.reshape(annotations, [-1, hidden_units])

    W_ff = tf.get_variable('W_ff', shape = [hidden_units, attention_dim])
    b_ff = tf.get_variable('b_ff', shape = [attention_dim])
    u_context = tf.get_variable('context', shape = [attention_dim], initializer = tf.random_normal_initializer(stddev = 0.512))
    
    input_embedding = tf.tanh(tf.add(tf.matmul(A_re, W_ff), tf.reshape(b_ff, [1,-1])))
    energy = tf.matmul(input_embedding, tf.expand_dims(u_context,1))
    attention_energy = tf.reshape(energy, [-1, sequence_length])
    p = tf.nn.softmax(attention_energy)
    D = tf.matrix_diag(p)
    
    # Compute weighted sum of annotations
    if my_method:
        output = tf.reduce_sum(tf.matmul(D, annotations), 1)
    else:
        output = tf.reduce_sum(annotations * tf.reshape(p, [-1, sequence_length, 1]), 1)
        
    return output

def attention_B(annotations, attention_dim, feedforward = True, my_method = True):
    
    sequence_length = annotations.get_shape()[1].value
    hidden_units = annotations.get_shape()[2].value
    B_re = tf.reshape(annotations, [hidden_units, -1])

    W_ff = tf.get_variable('W_ff', shape = [attention_dim, hidden_units])
    b_ff = tf.get_variable('b_ff', shape = [attention_dim])
    u_context = tf.get_variable('context', shape = [attention_dim], initializer = tf.random_normal_initializer(stddev = 0.512))
    
    prod = tf.matmul(W_ff, B_re)
    b_ff_tiled = tf.tile(tf.expand_dims(b_ff,1), [1,prod.shape[1].value])
    
    input_embedding = tf.tanh(tf.add(prod, b_ff_tiled))
    energy = tf.matmul(tf.transpose(tf.expand_dims(u_context,1)), input_embedding)
    energy = tf.reshape(energy, [-1, sequence_length])

    p = tf.nn.softmax(energy)
    D = tf.matrix_diag(p)
    
    # Compute weighted sum of annotations
    if my_method:
        output = tf.reduce_sum(tf.matmul(D, annotations), 1)
    else:
        output = tf.reduce_sum(annotations * tf.reshape(p, [-1, sequence_length, 1]), 1)
        
    return output

### Read Data

In [3]:
test_file = '/data/projects/punim0011/jtan/data/rnn/rnn_B02rho0gamma_kst.h5'

df_X_train, df_X_test, df_y_train, df_y_test = load_data(test_file)
df_train = p_ordering(pd.concat([df_X_train, df_y_train], axis = 1))
df_test = p_ordering(pd.concat([df_X_test, df_y_test], axis = 1))

config.n_features = df_train.shape[1] - 1
config.seq_length = config.n_features//config.n_particles
config.steps_per_epoch = len(df_X_train) // config.batch_size
assert config.seq_length == config.n_features/config.n_particles, 'Discrepancy in input feature dimension'

readerTrain = reader(df_train)
readerTest = reader(df_test)

### RNN construction

In [4]:
def layer_weights(shape, name = 'weights'):
    # Return weight tensor of given shape using Xavier initialization
    W = tf.get_variable(name, shape = shape, initializer=tf.contrib.layers.xavier_initializer())
    return W

def layer_biases(shape, name = 'biases'):
    # Return bias tensor of given shape with small initialized constant value
    b = tf.get_variable(name, shape = shape, initializer = tf.constant_initializer(0.01))
    return b

def BN_layer_ops(x, shape, name, keep_prob, phase, activation=tf.nn.relu):
    # High-level implementation of BN
    with tf.variable_scope(name) as scope:
         # scope.reuse_variables() # otherwise tf.get_variable() checks that already existing vars are not shared by accident
        weights = layer_weights(shape = shape)
        biases = layer_biases(shape = [shape[1]])
        z_BN = tf.matmul(x, weights) + biases
        
        # Place BN transform before non-linearity - update to TF 1.2!
        theta_BN = tf.contrib.layers.batch_norm(z_BN, center=True, scale=True,is_training=phase, 
                                                decay=0.99, zero_debias_moving_mean=True, scope='bn', fused = True)
        BN_actv = activation(theta_BN)
        BN_layer_output = tf.nn.dropout(BN_actv, keep_prob)

    return BN_layer_output

def readout_ops(x, shape, name):
    # Don't apply non-linearity, dropout on output layer
    with tf.variable_scope(name) as scope:
        weights = layer_weights(shape = shape)
        biases = layer_biases(shape = [shape[1]])
        layer_output = tf.matmul(x, weights) + biases
        
    return layer_output

def build_network(x, n_layers, hidden_layer_nodes, keep_prob, training_phase):
    assert n_layers == len(hidden_layer_nodes), 'Specified layer nodes and number of layers do not correspond.'
    layers = [x]
    with tf.variable_scope('BN_layers') as scope:
        hidden_1 = BN_layer_ops(x, shape = [x.get_shape()[-1], hidden_layer_nodes[0]], name = 'BNhidden0',
                                keep_prob = keep_prob, phase = training_phase)
        layers.append(hidden_1)
        for n in range(0,n_layers-1):
            hidden_n = BN_layer_ops(layers[-1], shape = [hidden_layer_nodes[n], hidden_layer_nodes[n+1]], name = 'BNhidden{}'.format(n+1),
                                   keep_prob = keep_prob, phase = training_phase)
            layers.append(hidden_n)
        readout = readout_ops(layers[-1], shape = [hidden_layer_nodes[-1], config.n_classes], name = 'readout')
        
    return readout

def extract_last_2(rnn_output, ind):
    batch_range = tf.range(rnn_output.get_shape()[0])
    indices = tf.stack([batch_range, ind], axis=1)
    summary = tf.gather_nd(rnn_output, indices)

    return summary

def extract_axis_1(data, ind):
    """
    Get specified elements along the first axis of tensor.
    :param data: Tensorflow tensor that will be subsetted.
    :param ind: Indices to take (one for each element along axis 0 of data).
    :return: Subsetted tensor.
    """

    batch_range = tf.range(tf.shape(data)[0])
    indices = tf.stack([batch_range, ind], axis=1)
    res = tf.gather_nd(data, indices)

    return res

In [5]:
class BiRNN():
    def __init__(self, config, training = True):
        self.config = config                
        # Placeholders for feed_dict
        self.inputs = tf.placeholder(tf.float32, shape = [None, self.config.n_features])
        self.targets = tf.placeholder(tf.int32, shape = [None])
        self.keep_prob = tf.placeholder(tf.float32) # Dropout on input connections
        self.dense_keep_prob = tf.placeholder(tf.float32) # Dropout for dense layers
        self.training_phase = tf.placeholder(tf.bool)
        self.global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
        
        # Reshape input to batch_size x n_particles x seq_length tensor
        rnn_inputs = tf.reshape(self.inputs, [-1, config.n_particles, config.seq_length])
        
        # Choose rnn cell type
        if config.rnn_cell == 'lstm':
            args = {'num_units': config.hidden_units, 'forget_bias': 1.0, 'state_is_tuple': True}
            base_cell = tf.nn.rnn_cell.LSTMCell
        elif config.rnn_cell == 'gru':
            args = {'num_units': config.hidden_units}
            base_cell = tf.nn.rnn_cell.GRUCell
        elif config.rnn_cell == 'layer-norm':
            args = {'num_units': config.hidden_units, 'forget_bias': 1.0, 'dropout_keep_prob': self.config.recurrent_keep_prob}
            base_cell = tf.contrib.rnn.LayerNormBasicLSTMCell
        else:
            args = {'num_units': config.hidden_units, 'forget_bias': 1.0, 'dropout_keep_prob': self.config.recurrent_keep_prob}
            base_cell = tf.contrib.rnn.LayerNormBasicLSTMCell

        self.cell = base_cell
#         # Apply Dropout operator on non-recurrent connections, wrap stacked cells into a single cell
#         if training and self.config.input_keep_prob < 1:
#             rnn_inputs = tf.nn.dropout(rnn_inputs, self.keep_prob)
#             self.multicell = tf.nn.rnn_cell.MultiRNNCell(
#             [tf.nn.rnn_cell.DropoutWrapper(self.cell(**args), input_keep_prob = self.config.input_keep_prob) for _ in range(config.num_layers)],
#             state_is_tuple = True)
#         else:
#             self.multicell = tf.nn.rnn_cell.MultiRNNCell(
#             [self.cell(**args) for _ in range(config.num_layers)], state_is_tuple = True)
            
#         # Accept previous hidden state as input
#         self.init_state = self.multicell.zero_state(self.config.batch_size, tf.float32)
#         # Outputs shaped [batch_size, max_time, cell.output_size]
#         rnn_outputs, final_state = tf.nn.dynamic_rnn(
#             cell = self.multicell, inputs = rnn_inputs, initial_state = self.init_state, scope = self.scope)
#         self.output = rnn_outputs[:,-1,:]
        
        if training and self.config.input_keep_prob < 1:
            rnn_inputs = tf.nn.dropout(rnn_inputs, self.keep_prob)
            fwd_cells = [tf.nn.rnn_cell.DropoutWrapper(
                self.cell(**args), input_keep_prob = self.config.input_keep_prob) for _ in range(config.num_layers)]
            bwd_cells = [tf.nn.rnn_cell.DropoutWrapper(
                self.cell(**args), input_keep_prob = self.config.input_keep_prob) for _ in range(config.num_layers)]
        else:
            fwd_cells = [self.cell(**args) for _ in range(config.num_layers)]
            bwd_cells = [self.cell(**args) for _ in range(config.num_layers)]
        
        fwd_init = [fwd_cell.zero_state(self.config.batch_size, tf.float32) for fwd_cell in fwd_cells]
        bwd_init = [bwd_cell.zero_state(self.config.batch_size, tf.float32) for bwd_cell in bwd_cells]
        
        self.birnn_output, self.fwd_state, self.bwd_state = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
            cells_fw = fwd_cells,
            cells_bw = bwd_cells,
            inputs = rnn_inputs,
            initial_states_fw = fwd_init,
            initial_states_bw = bwd_init,
            sequence_length = np.ones(config.batch_size)*config.n_particles,
            parallel_iterations = 128)
        
#         summary_vector = attention_A(self.birnn_output, config.attention_dim, my_method = False)
        
#         with tf.variable_scope('smx'):
#             W_c = layer_weights(shape = [config.attention_dim, config.n_classes], name = 'smx_W_fwd')
#             b_c = layer_biases(shape = [config.n_classes], name = 'smx_b')
#             self.logits_RNN = tf.matmul(summary_vector, W_c) + b_c
        
#         with tf.variable_scope('softmax'):
#             W_f = layer_weights(shape = [config.hidden_units, config.n_classes], name = 'smx_W_fwd')
#             W_b = layer_weights(shape = [config.hidden_units, config.n_classes], name = 'smx_W_bwd')
#             softmax_b = layer_biases(shape = [config.n_classes], name = 'smx_b')
#             self.logits_RNN = tf.matmul(self.fwd_state[-1], W_f) + tf.matmul(self.bwd_state[-1], W_b) + softmax_b
            
        # Extract output from last time step
        
        ind = np.zeros(int(self.birnn_output.get_shape()[0])) + (config.n_particles - 1)
        y = extract_last_2(self.birnn_output, ind)
        output_fwd, output_bwd = tf.split(y, 2, axis = 1)
#         outputs = tf.split(self.birnn_output, 2, axis = 2)
#         output_fwd = outputs[0][:,-1,:]
#         output_bwd = outputs[1][:,-1,:]

        with tf.variable_scope('softmax'):
            W_f = layer_weights(shape = [config.hidden_units, config.n_classes], name = 'smx_W_fwd')
            W_b = layer_weights(shape = [config.hidden_units, config.n_classes], name = 'smx_W_bwd')
            softmax_b = layer_biases(shape = [config.n_classes], name = 'smx_b')
            self.logits_RNN = tf.matmul(output_fwd, W_f) + tf.matmul(output_bwd, W_b) + softmax_b  # Unormalized log probabilties for next char
        
        self.cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = self.logits_RNN, labels = self.targets))
        tf.summary.scalar('cross_entropy', self.cross_entropy)
        
        # Anneal learning rate
        #learning_rate = tf.train.exponential_decay(config.learning_rate, global_step,
        #                                               decay_steps = config.steps_per_epoch, decay_rate = config.lr_epoch_decay, staircase=True)
        opt = tf.train.AdamOptimizer(config.learning_rate)
        grad_var_pairs = opt.compute_gradients(self.cross_entropy)          
        grad_var_pairs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grad_var_pairs]
        
        # Add histograms for gradients.
        for grad, var in grad_var_pairs:
            if grad is not None:
                tf.summary.histogram(var.op.name + '/gradients', grad)
                
        self.opt_op = opt.apply_gradients(grad_var_pairs, global_step=self.global_step)
#         self.opt_op = tf.train.AdamOptimizer(config.learning_rate).minimize(self.cross_entropy, name = 'optimizer',
#                                                                               global_step = self.global_step)
        
        self.ema = tf.train.ExponentialMovingAverage(decay = config.ema_decay, num_updates = self.global_step)
        maintain_averages_op = self.ema.apply(tf.trainable_variables())
        
        with tf.control_dependencies([self.opt_op]):
            self.train_op = tf.group(maintain_averages_op)
        
        # Evaluate correctness
        self.predictions = tf.nn.softmax(self.logits_RNN)
        correct_prediction = tf.equal(tf.cast(tf.argmax(self.predictions, 1), tf.int32), self.targets)
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', self.accuracy)
        self.merge_op = tf.summary.merge_all()
    
    def predict(self, ckpt, metaGraph = None):
        pin_cpu = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True, device_count = {'GPU':0})
    
        # Restore the moving average version of the learned variables for eval.
        #variable_averages = tf.train.ExponentialMovingAverage(config.ema_decay)
        variables_to_restore = self.ema.variables_to_restore()
        #variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        
        with tf.Session(config=pin_cpu) as sess:
            # Initialize variables
            init_op = tf.global_variables_initializer()
            sess.run(init_op)
            sess.run(tf.local_variables_initializer())
            start_time = time.time()
            assert (ckpt.model_checkpoint_path or metaGraph), 'Missing checkpoint file!'
            
            if metaGraph:
                saver = tf.train.import_meta_graph(metaGraph)
                saver.restore(sess, os.path.splitext(metaGraph)[0])
                print('{} restored.'.format(metaGraph))
            else:    
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('{} restored.'.format(ckpt.model_checkpoint_path))

            # Make predictions using the trained model
            feed_dict_test = {self.inputs: df_X_test.values, self.targets: df_y_test.values, self.keep_prob: 1.0}#, self.training_phase: False}
            network_output_test, final_v_acc, final_v_auc = sess.run(
                [self.prediction, self.accuracy, self.auc_op], feed_dict = feed_dict_test)

            print("Validation accuracy: {:g}\nValidation AUC: {:g}".format(final_v_acc, final_v_auc))
            
            plot_ROC_curve(network_output = network_output_test, y_true = df_y_test.values,
                           meta = architecture + ' | Test accuracy: {}'.format(final_v_acc))            
            delta_t = time.time() - start_time
            print("Inference complete. Duration: %g s" %(delta_t))
            
            return network_output_test

In [92]:
extract_last_2(y, ind) == extract_axis_1(y, ind)

False

In [None]:
        outputs = tf.split(y, 2, axis = 2)


In [94]:
tf.split(extract_axis_1(y, ind), 2, axis = 1)

[<tf.Tensor 'split:0' shape=(128, 256) dtype=float32>,
 <tf.Tensor 'split:1' shape=(128, 256) dtype=float32>]

In [67]:
tf.shape(y)[0]

<tf.Tensor 'strided_slice_12:0' shape=() dtype=int32>

In [79]:
r = tf.range(y.get_shape()[0])

In [78]:
ind.shape

(128,)

In [82]:
tf.stack([r, ind], axis = 1)

<tf.Tensor 'stack_7:0' shape=(128, 2) dtype=int32>

In [73]:
tf.stack([tf.range(y.get_shape()[0]), ind], axis = 1)

<tf.Tensor 'stack_5:0' shape=(128, 2) dtype=int32>

In [58]:
tf.shape(y)

<tf.Tensor 'Shape_9:0' shape=(3,) dtype=int32>

In [63]:
tf.gather_nd(y,tf.stack([tf.range(tf.shape(y)[0]),ind], axis = 1))

<tf.Tensor 'GatherNd_2:0' shape=(128, 512) dtype=float32>

In [64]:
ind = np.zeros(int(y.get_shape()[0])) + (config.n_particles - 1)

In [84]:
z, inds = extract_axis_1(y, ind)

In [85]:
z

<tf.Tensor 'GatherNd_3:0' shape=(128, 512) dtype=float32>

In [86]:
inds

<tf.Tensor 'stack_8:0' shape=(128, 2) dtype=int32>

In [51]:
y[:,-1,:]

<tf.Tensor 'strided_slice_4:0' shape=(128, 512) dtype=float32>

In [7]:
biRNN = BiRNN(config, training = True)

In [59]:
y=biRNN.birnn_output

In [60]:
y

<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(128, 5, 512) dtype=float32>

In [19]:
y = np.random.normal(size=1000)

In [38]:
y.get_shape()[0]

Dimension(128)

In [31]:
tf.shape(y)

<tf.Tensor 'Shape_2:0' shape=(3,) dtype=int32>

In [27]:
y[:30, 4,:]

<tf.Tensor 'strided_slice:0' shape=(30, 512) dtype=float32>

In [10]:
config.batch_size

128

In [14]:
tf.range(config.batch_size)

<tf.Tensor 'range_1:0' shape=(128,) dtype=int32>

In [6]:
def train(config, restore = False):
    
    biRNN = BiRNN(config, training = True)
    start_time = time.time()
    v_acc_best = 0.
    saver = tf.train.Saver()
    train_writer = tf.summary.FileWriter(
        os.path.join(directories.tensorboard, 'train_{}'.format(time.strftime('%d-%m_%I:%M'))), graph = tf.get_default_graph())
    test_writer = tf.summary.FileWriter(os.path.join(directories.tensorboard, 'test_{}'.format(time.strftime('%d-%m_%I:%M'))))
    ckpt = tf.train.get_checkpoint_state(directories.checkpoints)
    
    with tf.Session() as sess:
        # Initialize variables
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        
        if restore and ckpt.model_checkpoint_path:
            print('{} restored.'.format(ckpt.model_checkpoint_path))
#             saver = tf.train.import_meta_graph('checkpoints/char-RNN__epoch49.ckpt-49.meta')
#             saver.restore(sess, 'checkpoints/char-RNN__epoch49.ckpt-49')
            saver.restore(sess, ckpt.model_checkpoint_path)
                    
        for epoch in range(config.num_epochs):
            
            readerTrain.proceed = True
            begin = time.time()
            step = 0
            print('(*) Entering Epoch {} ({:.3f} s)'.format(epoch, time.time() - start_time))

            # Save every 10 epochs    
            if epoch % 10 == 0:
                save_path = saver.save(sess,
                                       os.path.join(directories.checkpoints,'biRNN_{}_{}_epoch{}.ckpt'.format(config.mode, config.channel, epoch)),
                                       global_step = epoch)
                print('Graph saved to file: {}'.format(save_path))

            while(readerTrain.proceed):
                # Iterate through entire corpus
                x_train, y_train = readerTrain.next_batch(config.batch_size)
                feed_dict_train = {biRNN.inputs: x_train.values, biRNN.targets: y_train.values, biRNN.keep_prob: config.keep_prob, 
                                   biRNN.training_phase: True}
                t_op = sess.run(biRNN.train_op, feed_dict = feed_dict_train)
                step += 1

                if step % (config.steps_per_epoch // 8) == 0:            
                    # Evaluate model
                    improved = ''
                    x_test, y_test = readerTest.next_batch(config.batch_size)
                    feed_dict_test = {biRNN.inputs: x_test.values, biRNN.targets: y_test.values, 
                                      biRNN.keep_prob: 1.0, biRNN.training_phase: False}

                    t_acc, t_loss, t_summary = sess.run([biRNN.accuracy, biRNN.cross_entropy, biRNN.merge_op],
                                                        feed_dict = feed_dict_train)
                    v_acc, v_loss, v_summary, = sess.run([biRNN.accuracy, biRNN.cross_entropy, biRNN.merge_op],
                                                        feed_dict = feed_dict_test)

                    train_writer.add_summary(t_summary, step)
                    test_writer.add_summary(v_summary, step)
                    
                    if epoch > 5 and v_acc > v_acc_best:
                        v_acc_best = v_acc
                        improved = '*'
                        save_path = saver.save(sess, os.path.join(directories.checkpoints, 'best.ckpt'), global_step = epoch)
                    
                    print('Epoch {}, Step {} | Training Accuracy: {:.3f} | Test Accuracy: {:.3f} | Training Loss: {:.3f} | Test Loss {:.3f} {}'
                          .format(epoch, step, t_acc, v_acc, t_loss, v_loss, improved))

        save_path = saver.save(sess, os.path.join(directories.checkpoints, 'biRNN_end'),
                               global_step = epoch)
        print('Metagraph saved to file: {}'.format(save_path))

        final_train_accuracy = biRNN.accuracy.eval(feed_dict = {biRNN.inputs: df_X_train.values, biRNN.targets: df_y_train.values, 
                                                                biRNN.keep_prob: 1.0, biRNN.training_phase: False})
        final_test_accuracy = biRNN.accuracy.eval(feed_dict = {biRNN.inputs: df_X_test.values, biRNN.targets: df_y_test.values,
                                                               biRNN.keep_prob: 1.0, biRNN.training_phase: False})
        delta_t = time.time() - start_time
            
    print("Training Complete. Time elapsed: {:.3f} s".format(delta_t))
    print("Train accuracy: %g\nValidation accuracy: %g" %(final_train_accuracy, final_test_accuracy))

    print('Architecture: {}'.format(architecture))
    save_summary(config, delta_t, final_train_accuracy, final_test_accuracy)

In [None]:
train(config)#, restore = True)

(*) Entering Epoch 0 (11.691 s)
Graph saved to file: checkpoints/biRNN_kst_rho0_epoch0.ckpt-0
Epoch 0, Step 475 | Training Accuracy: 0.570 | Test Accuracy: 0.539 | Training Loss: 0.660 | Test Loss 0.706 
Epoch 0, Step 950 | Training Accuracy: 0.602 | Test Accuracy: 0.703 | Training Loss: 0.635 | Test Loss 0.573 
Epoch 0, Step 1425 | Training Accuracy: 0.656 | Test Accuracy: 0.703 | Training Loss: 0.638 | Test Loss 0.619 
Epoch 0, Step 1900 | Training Accuracy: 0.656 | Test Accuracy: 0.641 | Training Loss: 0.598 | Test Loss 0.615 
Epoch 0, Step 2375 | Training Accuracy: 0.602 | Test Accuracy: 0.625 | Training Loss: 0.643 | Test Loss 0.620 
Epoch 0, Step 2850 | Training Accuracy: 0.617 | Test Accuracy: 0.688 | Training Loss: 0.595 | Test Loss 0.591 
Epoch 0, Step 3325 | Training Accuracy: 0.633 | Test Accuracy: 0.609 | Training Loss: 0.654 | Test Loss 0.608 
Epoch 0, Step 3800 | Training Accuracy: 0.688 | Test Accuracy: 0.750 | Training Loss: 0.578 | Test Loss 0.499 
(*) Entering Epoch 1

Epoch 8, Step 3800 | Training Accuracy: 0.828 | Test Accuracy: 0.828 | Training Loss: 0.424 | Test Loss 0.373 
(*) Entering Epoch 9 (831.050 s)
Epoch 9, Step 475 | Training Accuracy: 0.812 | Test Accuracy: 0.781 | Training Loss: 0.395 | Test Loss 0.459 
Epoch 9, Step 950 | Training Accuracy: 0.867 | Test Accuracy: 0.812 | Training Loss: 0.351 | Test Loss 0.370 
Epoch 9, Step 1425 | Training Accuracy: 0.750 | Test Accuracy: 0.812 | Training Loss: 0.481 | Test Loss 0.442 
Epoch 9, Step 1900 | Training Accuracy: 0.820 | Test Accuracy: 0.820 | Training Loss: 0.413 | Test Loss 0.427 
Epoch 9, Step 2375 | Training Accuracy: 0.844 | Test Accuracy: 0.812 | Training Loss: 0.391 | Test Loss 0.385 
Epoch 9, Step 2850 | Training Accuracy: 0.805 | Test Accuracy: 0.789 | Training Loss: 0.444 | Test Loss 0.445 
Epoch 9, Step 3325 | Training Accuracy: 0.805 | Test Accuracy: 0.773 | Training Loss: 0.433 | Test Loss 0.457 
Epoch 9, Step 3800 | Training Accuracy: 0.914 | Test Accuracy: 0.852 | Training L

Epoch 17, Step 3325 | Training Accuracy: 0.781 | Test Accuracy: 0.836 | Training Loss: 0.406 | Test Loss 0.347 
Epoch 17, Step 3800 | Training Accuracy: 0.828 | Test Accuracy: 0.781 | Training Loss: 0.387 | Test Loss 0.483 
(*) Entering Epoch 18 (1650.734 s)
Epoch 18, Step 475 | Training Accuracy: 0.844 | Test Accuracy: 0.781 | Training Loss: 0.412 | Test Loss 0.419 
Epoch 18, Step 950 | Training Accuracy: 0.820 | Test Accuracy: 0.797 | Training Loss: 0.353 | Test Loss 0.395 
Epoch 18, Step 1425 | Training Accuracy: 0.789 | Test Accuracy: 0.781 | Training Loss: 0.469 | Test Loss 0.426 
Epoch 18, Step 1900 | Training Accuracy: 0.883 | Test Accuracy: 0.758 | Training Loss: 0.319 | Test Loss 0.393 
Epoch 18, Step 2375 | Training Accuracy: 0.781 | Test Accuracy: 0.805 | Training Loss: 0.454 | Test Loss 0.406 
Epoch 18, Step 2850 | Training Accuracy: 0.820 | Test Accuracy: 0.844 | Training Loss: 0.383 | Test Loss 0.434 
Epoch 18, Step 3325 | Training Accuracy: 0.844 | Test Accuracy: 0.789 |

Epoch 26, Step 2375 | Training Accuracy: 0.789 | Test Accuracy: 0.844 | Training Loss: 0.425 | Test Loss 0.345 
Epoch 26, Step 2850 | Training Accuracy: 0.875 | Test Accuracy: 0.812 | Training Loss: 0.371 | Test Loss 0.404 
Epoch 26, Step 3325 | Training Accuracy: 0.852 | Test Accuracy: 0.805 | Training Loss: 0.386 | Test Loss 0.374 
Epoch 26, Step 3800 | Training Accuracy: 0.812 | Test Accuracy: 0.805 | Training Loss: 0.399 | Test Loss 0.431 
(*) Entering Epoch 27 (2471.466 s)
Epoch 27, Step 475 | Training Accuracy: 0.805 | Test Accuracy: 0.828 | Training Loss: 0.412 | Test Loss 0.350 
Epoch 27, Step 950 | Training Accuracy: 0.836 | Test Accuracy: 0.797 | Training Loss: 0.375 | Test Loss 0.422 
Epoch 27, Step 1425 | Training Accuracy: 0.805 | Test Accuracy: 0.812 | Training Loss: 0.463 | Test Loss 0.403 
Epoch 27, Step 1900 | Training Accuracy: 0.828 | Test Accuracy: 0.758 | Training Loss: 0.392 | Test Loss 0.508 
Epoch 27, Step 2375 | Training Accuracy: 0.883 | Test Accuracy: 0.852 |

Epoch 35, Step 1425 | Training Accuracy: 0.828 | Test Accuracy: 0.867 | Training Loss: 0.323 | Test Loss 0.333 
Epoch 35, Step 1900 | Training Accuracy: 0.867 | Test Accuracy: 0.867 | Training Loss: 0.330 | Test Loss 0.306 
Epoch 35, Step 2375 | Training Accuracy: 0.820 | Test Accuracy: 0.797 | Training Loss: 0.417 | Test Loss 0.406 
Epoch 35, Step 2850 | Training Accuracy: 0.859 | Test Accuracy: 0.820 | Training Loss: 0.346 | Test Loss 0.435 
Epoch 35, Step 3325 | Training Accuracy: 0.828 | Test Accuracy: 0.789 | Training Loss: 0.353 | Test Loss 0.403 
Epoch 35, Step 3800 | Training Accuracy: 0.820 | Test Accuracy: 0.805 | Training Loss: 0.386 | Test Loss 0.391 
(*) Entering Epoch 36 (3291.241 s)
Epoch 36, Step 475 | Training Accuracy: 0.859 | Test Accuracy: 0.828 | Training Loss: 0.314 | Test Loss 0.325 
Epoch 36, Step 950 | Training Accuracy: 0.844 | Test Accuracy: 0.844 | Training Loss: 0.300 | Test Loss 0.338 
Epoch 36, Step 1425 | Training Accuracy: 0.828 | Test Accuracy: 0.789 |

Epoch 44, Step 475 | Training Accuracy: 0.812 | Test Accuracy: 0.844 | Training Loss: 0.394 | Test Loss 0.342 
Epoch 44, Step 950 | Training Accuracy: 0.852 | Test Accuracy: 0.836 | Training Loss: 0.333 | Test Loss 0.369 
Epoch 44, Step 1425 | Training Accuracy: 0.867 | Test Accuracy: 0.836 | Training Loss: 0.315 | Test Loss 0.356 
Epoch 44, Step 1900 | Training Accuracy: 0.867 | Test Accuracy: 0.805 | Training Loss: 0.323 | Test Loss 0.389 
Epoch 44, Step 2375 | Training Accuracy: 0.805 | Test Accuracy: 0.891 | Training Loss: 0.385 | Test Loss 0.244 
Epoch 44, Step 2850 | Training Accuracy: 0.828 | Test Accuracy: 0.852 | Training Loss: 0.380 | Test Loss 0.369 
Epoch 44, Step 3325 | Training Accuracy: 0.789 | Test Accuracy: 0.805 | Training Loss: 0.422 | Test Loss 0.438 
Epoch 44, Step 3800 | Training Accuracy: 0.875 | Test Accuracy: 0.844 | Training Loss: 0.292 | Test Loss 0.329 
(*) Entering Epoch 45 (4111.713 s)
Epoch 45, Step 475 | Training Accuracy: 0.836 | Test Accuracy: 0.867 | 

Epoch 52, Step 3800 | Training Accuracy: 0.875 | Test Accuracy: 0.844 | Training Loss: 0.324 | Test Loss 0.390 
(*) Entering Epoch 53 (4840.658 s)
Epoch 53, Step 475 | Training Accuracy: 0.844 | Test Accuracy: 0.836 | Training Loss: 0.361 | Test Loss 0.367 
Epoch 53, Step 950 | Training Accuracy: 0.812 | Test Accuracy: 0.844 | Training Loss: 0.381 | Test Loss 0.315 
Epoch 53, Step 1425 | Training Accuracy: 0.844 | Test Accuracy: 0.797 | Training Loss: 0.301 | Test Loss 0.448 
Epoch 53, Step 1900 | Training Accuracy: 0.898 | Test Accuracy: 0.875 | Training Loss: 0.281 | Test Loss 0.377 
Epoch 53, Step 2375 | Training Accuracy: 0.805 | Test Accuracy: 0.781 | Training Loss: 0.357 | Test Loss 0.419 
Epoch 53, Step 2850 | Training Accuracy: 0.828 | Test Accuracy: 0.797 | Training Loss: 0.353 | Test Loss 0.425 
Epoch 53, Step 3325 | Training Accuracy: 0.859 | Test Accuracy: 0.797 | Training Loss: 0.336 | Test Loss 0.422 
Epoch 53, Step 3800 | Training Accuracy: 0.836 | Test Accuracy: 0.789 |

Epoch 61, Step 2850 | Training Accuracy: 0.844 | Test Accuracy: 0.867 | Training Loss: 0.353 | Test Loss 0.332 
Epoch 61, Step 3325 | Training Accuracy: 0.789 | Test Accuracy: 0.797 | Training Loss: 0.403 | Test Loss 0.394 
Epoch 61, Step 3800 | Training Accuracy: 0.844 | Test Accuracy: 0.859 | Training Loss: 0.376 | Test Loss 0.333 
(*) Entering Epoch 62 (5661.325 s)
Epoch 62, Step 475 | Training Accuracy: 0.812 | Test Accuracy: 0.883 | Training Loss: 0.346 | Test Loss 0.323 
Epoch 62, Step 950 | Training Accuracy: 0.844 | Test Accuracy: 0.820 | Training Loss: 0.377 | Test Loss 0.433 
Epoch 62, Step 1425 | Training Accuracy: 0.875 | Test Accuracy: 0.875 | Training Loss: 0.300 | Test Loss 0.291 
Epoch 62, Step 1900 | Training Accuracy: 0.828 | Test Accuracy: 0.852 | Training Loss: 0.385 | Test Loss 0.386 
Epoch 62, Step 2375 | Training Accuracy: 0.836 | Test Accuracy: 0.812 | Training Loss: 0.385 | Test Loss 0.409 
Epoch 62, Step 2850 | Training Accuracy: 0.891 | Test Accuracy: 0.797 |

Epoch 70, Step 1900 | Training Accuracy: 0.812 | Test Accuracy: 0.812 | Training Loss: 0.389 | Test Loss 0.370 
Epoch 70, Step 2375 | Training Accuracy: 0.875 | Test Accuracy: 0.820 | Training Loss: 0.323 | Test Loss 0.406 
Epoch 70, Step 2850 | Training Accuracy: 0.859 | Test Accuracy: 0.828 | Training Loss: 0.328 | Test Loss 0.406 
Epoch 70, Step 3325 | Training Accuracy: 0.797 | Test Accuracy: 0.867 | Training Loss: 0.447 | Test Loss 0.324 
Epoch 70, Step 3800 | Training Accuracy: 0.828 | Test Accuracy: 0.789 | Training Loss: 0.351 | Test Loss 0.437 
(*) Entering Epoch 71 (6479.699 s)
Epoch 71, Step 475 | Training Accuracy: 0.852 | Test Accuracy: 0.820 | Training Loss: 0.353 | Test Loss 0.393 
Epoch 71, Step 950 | Training Accuracy: 0.844 | Test Accuracy: 0.828 | Training Loss: 0.355 | Test Loss 0.388 
Epoch 71, Step 1425 | Training Accuracy: 0.859 | Test Accuracy: 0.781 | Training Loss: 0.342 | Test Loss 0.391 
Epoch 71, Step 1900 | Training Accuracy: 0.883 | Test Accuracy: 0.867 |

Epoch 79, Step 1425 | Training Accuracy: 0.906 | Test Accuracy: 0.844 | Training Loss: 0.266 | Test Loss 0.326 
Epoch 79, Step 1900 | Training Accuracy: 0.859 | Test Accuracy: 0.828 | Training Loss: 0.323 | Test Loss 0.360 
Epoch 79, Step 2375 | Training Accuracy: 0.836 | Test Accuracy: 0.820 | Training Loss: 0.290 | Test Loss 0.368 
Epoch 79, Step 2850 | Training Accuracy: 0.844 | Test Accuracy: 0.812 | Training Loss: 0.411 | Test Loss 0.406 
Epoch 79, Step 3325 | Training Accuracy: 0.875 | Test Accuracy: 0.805 | Training Loss: 0.367 | Test Loss 0.412 
Epoch 79, Step 3800 | Training Accuracy: 0.797 | Test Accuracy: 0.797 | Training Loss: 0.400 | Test Loss 0.432 
(*) Entering Epoch 80 (7297.949 s)
Graph saved to file: checkpoints/biRNN_kst_rho0_epoch80.ckpt-80
Epoch 80, Step 475 | Training Accuracy: 0.836 | Test Accuracy: 0.844 | Training Loss: 0.324 | Test Loss 0.328 
Epoch 80, Step 950 | Training Accuracy: 0.836 | Test Accuracy: 0.789 | Training Loss: 0.354 | Test Loss 0.399 
Epoch 8

Epoch 88, Step 475 | Training Accuracy: 0.805 | Test Accuracy: 0.805 | Training Loss: 0.393 | Test Loss 0.466 
Epoch 88, Step 950 | Training Accuracy: 0.805 | Test Accuracy: 0.820 | Training Loss: 0.437 | Test Loss 0.384 
Epoch 88, Step 1425 | Training Accuracy: 0.828 | Test Accuracy: 0.875 | Training Loss: 0.343 | Test Loss 0.332 
Epoch 88, Step 1900 | Training Accuracy: 0.852 | Test Accuracy: 0.820 | Training Loss: 0.332 | Test Loss 0.410 
Epoch 88, Step 2375 | Training Accuracy: 0.867 | Test Accuracy: 0.836 | Training Loss: 0.339 | Test Loss 0.409 
Epoch 88, Step 2850 | Training Accuracy: 0.859 | Test Accuracy: 0.797 | Training Loss: 0.365 | Test Loss 0.420 
Epoch 88, Step 3325 | Training Accuracy: 0.836 | Test Accuracy: 0.836 | Training Loss: 0.311 | Test Loss 0.334 
Epoch 88, Step 3800 | Training Accuracy: 0.797 | Test Accuracy: 0.805 | Training Loss: 0.405 | Test Loss 0.432 
(*) Entering Epoch 89 (8116.480 s)
Epoch 89, Step 475 | Training Accuracy: 0.844 | Test Accuracy: 0.773 | 

Epoch 96, Step 3800 | Training Accuracy: 0.859 | Test Accuracy: 0.820 | Training Loss: 0.351 | Test Loss 0.432 
(*) Entering Epoch 97 (8844.074 s)
Epoch 97, Step 475 | Training Accuracy: 0.805 | Test Accuracy: 0.805 | Training Loss: 0.364 | Test Loss 0.431 
Epoch 97, Step 950 | Training Accuracy: 0.812 | Test Accuracy: 0.867 | Training Loss: 0.375 | Test Loss 0.326 
Epoch 97, Step 1425 | Training Accuracy: 0.883 | Test Accuracy: 0.859 | Training Loss: 0.286 | Test Loss 0.302 
Epoch 97, Step 1900 | Training Accuracy: 0.836 | Test Accuracy: 0.852 | Training Loss: 0.381 | Test Loss 0.354 
Epoch 97, Step 2375 | Training Accuracy: 0.898 | Test Accuracy: 0.852 | Training Loss: 0.288 | Test Loss 0.317 
Epoch 97, Step 2850 | Training Accuracy: 0.836 | Test Accuracy: 0.852 | Training Loss: 0.381 | Test Loss 0.387 
Epoch 97, Step 3325 | Training Accuracy: 0.844 | Test Accuracy: 0.828 | Training Loss: 0.396 | Test Loss 0.348 
Epoch 97, Step 3800 | Training Accuracy: 0.828 | Test Accuracy: 0.875 |

Epoch 105, Step 2850 | Training Accuracy: 0.852 | Test Accuracy: 0.836 | Training Loss: 0.336 | Test Loss 0.362 
Epoch 105, Step 3325 | Training Accuracy: 0.859 | Test Accuracy: 0.867 | Training Loss: 0.361 | Test Loss 0.308 
Epoch 105, Step 3800 | Training Accuracy: 0.859 | Test Accuracy: 0.844 | Training Loss: 0.369 | Test Loss 0.364 
(*) Entering Epoch 106 (9662.235 s)
Epoch 106, Step 475 | Training Accuracy: 0.844 | Test Accuracy: 0.820 | Training Loss: 0.318 | Test Loss 0.374 
Epoch 106, Step 950 | Training Accuracy: 0.867 | Test Accuracy: 0.734 | Training Loss: 0.320 | Test Loss 0.552 
Epoch 106, Step 1425 | Training Accuracy: 0.836 | Test Accuracy: 0.852 | Training Loss: 0.312 | Test Loss 0.354 
Epoch 106, Step 1900 | Training Accuracy: 0.789 | Test Accuracy: 0.820 | Training Loss: 0.408 | Test Loss 0.434 
Epoch 106, Step 2375 | Training Accuracy: 0.812 | Test Accuracy: 0.820 | Training Loss: 0.380 | Test Loss 0.450 
Epoch 106, Step 2850 | Training Accuracy: 0.859 | Test Accurac

Epoch 114, Step 1900 | Training Accuracy: 0.844 | Test Accuracy: 0.797 | Training Loss: 0.371 | Test Loss 0.402 
Epoch 114, Step 2375 | Training Accuracy: 0.891 | Test Accuracy: 0.867 | Training Loss: 0.315 | Test Loss 0.305 
Epoch 114, Step 2850 | Training Accuracy: 0.867 | Test Accuracy: 0.773 | Training Loss: 0.296 | Test Loss 0.421 
Epoch 114, Step 3325 | Training Accuracy: 0.844 | Test Accuracy: 0.898 | Training Loss: 0.333 | Test Loss 0.325 
Epoch 114, Step 3800 | Training Accuracy: 0.906 | Test Accuracy: 0.852 | Training Loss: 0.306 | Test Loss 0.320 
(*) Entering Epoch 115 (10480.267 s)
Epoch 115, Step 475 | Training Accuracy: 0.867 | Test Accuracy: 0.789 | Training Loss: 0.315 | Test Loss 0.387 
Epoch 115, Step 950 | Training Accuracy: 0.875 | Test Accuracy: 0.758 | Training Loss: 0.316 | Test Loss 0.517 
Epoch 115, Step 1425 | Training Accuracy: 0.820 | Test Accuracy: 0.820 | Training Loss: 0.337 | Test Loss 0.440 
Epoch 115, Step 1900 | Training Accuracy: 0.875 | Test Accura

Epoch 123, Step 950 | Training Accuracy: 0.805 | Test Accuracy: 0.844 | Training Loss: 0.421 | Test Loss 0.371 
Epoch 123, Step 1425 | Training Accuracy: 0.828 | Test Accuracy: 0.844 | Training Loss: 0.329 | Test Loss 0.292 
Epoch 123, Step 1900 | Training Accuracy: 0.875 | Test Accuracy: 0.836 | Training Loss: 0.301 | Test Loss 0.330 
Epoch 123, Step 2375 | Training Accuracy: 0.859 | Test Accuracy: 0.852 | Training Loss: 0.342 | Test Loss 0.375 
Epoch 123, Step 2850 | Training Accuracy: 0.867 | Test Accuracy: 0.805 | Training Loss: 0.283 | Test Loss 0.438 
Epoch 123, Step 3325 | Training Accuracy: 0.797 | Test Accuracy: 0.836 | Training Loss: 0.407 | Test Loss 0.372 
Epoch 123, Step 3800 | Training Accuracy: 0.859 | Test Accuracy: 0.836 | Training Loss: 0.312 | Test Loss 0.418 
(*) Entering Epoch 124 (11298.238 s)
Epoch 124, Step 475 | Training Accuracy: 0.828 | Test Accuracy: 0.852 | Training Loss: 0.394 | Test Loss 0.342 
Epoch 124, Step 950 | Training Accuracy: 0.836 | Test Accurac

Epoch 131, Step 3800 | Training Accuracy: 0.812 | Test Accuracy: 0.805 | Training Loss: 0.368 | Test Loss 0.433 
(*) Entering Epoch 132 (12025.587 s)
Epoch 132, Step 475 | Training Accuracy: 0.820 | Test Accuracy: 0.805 | Training Loss: 0.377 | Test Loss 0.457 
Epoch 132, Step 950 | Training Accuracy: 0.805 | Test Accuracy: 0.836 | Training Loss: 0.413 | Test Loss 0.375 
Epoch 132, Step 1425 | Training Accuracy: 0.867 | Test Accuracy: 0.836 | Training Loss: 0.293 | Test Loss 0.359 
Epoch 132, Step 1900 | Training Accuracy: 0.820 | Test Accuracy: 0.812 | Training Loss: 0.365 | Test Loss 0.358 
Epoch 132, Step 2375 | Training Accuracy: 0.852 | Test Accuracy: 0.828 | Training Loss: 0.358 | Test Loss 0.406 
Epoch 132, Step 2850 | Training Accuracy: 0.789 | Test Accuracy: 0.914 | Training Loss: 0.398 | Test Loss 0.279 
Epoch 132, Step 3325 | Training Accuracy: 0.883 | Test Accuracy: 0.812 | Training Loss: 0.344 | Test Loss 0.464 
Epoch 132, Step 3800 | Training Accuracy: 0.867 | Test Accura

Epoch 140, Step 2850 | Training Accuracy: 0.883 | Test Accuracy: 0.875 | Training Loss: 0.338 | Test Loss 0.325 
Epoch 140, Step 3325 | Training Accuracy: 0.797 | Test Accuracy: 0.914 | Training Loss: 0.397 | Test Loss 0.245 
Epoch 140, Step 3800 | Training Accuracy: 0.828 | Test Accuracy: 0.859 | Training Loss: 0.332 | Test Loss 0.313 
(*) Entering Epoch 141 (12843.893 s)
Epoch 141, Step 475 | Training Accuracy: 0.859 | Test Accuracy: 0.820 | Training Loss: 0.276 | Test Loss 0.392 
Epoch 141, Step 950 | Training Accuracy: 0.789 | Test Accuracy: 0.844 | Training Loss: 0.457 | Test Loss 0.342 
Epoch 141, Step 1425 | Training Accuracy: 0.906 | Test Accuracy: 0.836 | Training Loss: 0.294 | Test Loss 0.341 
Epoch 141, Step 1900 | Training Accuracy: 0.852 | Test Accuracy: 0.812 | Training Loss: 0.302 | Test Loss 0.376 
Epoch 141, Step 2375 | Training Accuracy: 0.805 | Test Accuracy: 0.820 | Training Loss: 0.441 | Test Loss 0.371 
Epoch 141, Step 2850 | Training Accuracy: 0.852 | Test Accura

Epoch 149, Step 1900 | Training Accuracy: 0.867 | Test Accuracy: 0.812 | Training Loss: 0.338 | Test Loss 0.399 
Epoch 149, Step 2375 | Training Accuracy: 0.805 | Test Accuracy: 0.852 | Training Loss: 0.363 | Test Loss 0.401 
Epoch 149, Step 2850 | Training Accuracy: 0.859 | Test Accuracy: 0.883 | Training Loss: 0.334 | Test Loss 0.300 
Epoch 149, Step 3325 | Training Accuracy: 0.836 | Test Accuracy: 0.797 | Training Loss: 0.371 | Test Loss 0.378 
Epoch 149, Step 3800 | Training Accuracy: 0.828 | Test Accuracy: 0.805 | Training Loss: 0.385 | Test Loss 0.518 
(*) Entering Epoch 150 (13661.521 s)
Graph saved to file: checkpoints/biRNN_kst_rho0_epoch150.ckpt-150
Epoch 150, Step 475 | Training Accuracy: 0.844 | Test Accuracy: 0.781 | Training Loss: 0.357 | Test Loss 0.400 
Epoch 150, Step 950 | Training Accuracy: 0.883 | Test Accuracy: 0.836 | Training Loss: 0.295 | Test Loss 0.385 
Epoch 150, Step 1425 | Training Accuracy: 0.812 | Test Accuracy: 0.867 | Training Loss: 0.373 | Test Loss 0.

Epoch 158, Step 950 | Training Accuracy: 0.867 | Test Accuracy: 0.781 | Training Loss: 0.326 | Test Loss 0.381 
Epoch 158, Step 1425 | Training Accuracy: 0.852 | Test Accuracy: 0.727 | Training Loss: 0.328 | Test Loss 0.554 
Epoch 158, Step 1900 | Training Accuracy: 0.844 | Test Accuracy: 0.852 | Training Loss: 0.363 | Test Loss 0.330 
Epoch 158, Step 2375 | Training Accuracy: 0.883 | Test Accuracy: 0.812 | Training Loss: 0.321 | Test Loss 0.449 
Epoch 158, Step 2850 | Training Accuracy: 0.844 | Test Accuracy: 0.859 | Training Loss: 0.290 | Test Loss 0.408 
Epoch 158, Step 3325 | Training Accuracy: 0.797 | Test Accuracy: 0.844 | Training Loss: 0.419 | Test Loss 0.371 
Epoch 158, Step 3800 | Training Accuracy: 0.852 | Test Accuracy: 0.805 | Training Loss: 0.308 | Test Loss 0.415 
(*) Entering Epoch 159 (14479.628 s)
Epoch 159, Step 475 | Training Accuracy: 0.922 | Test Accuracy: 0.750 | Training Loss: 0.219 | Test Loss 0.525 
Epoch 159, Step 950 | Training Accuracy: 0.844 | Test Accurac

Epoch 166, Step 3800 | Training Accuracy: 0.867 | Test Accuracy: 0.805 | Training Loss: 0.325 | Test Loss 0.385 
(*) Entering Epoch 167 (15206.612 s)
Epoch 167, Step 475 | Training Accuracy: 0.906 | Test Accuracy: 0.867 | Training Loss: 0.281 | Test Loss 0.324 
Epoch 167, Step 950 | Training Accuracy: 0.828 | Test Accuracy: 0.805 | Training Loss: 0.320 | Test Loss 0.393 
Epoch 167, Step 1425 | Training Accuracy: 0.844 | Test Accuracy: 0.820 | Training Loss: 0.268 | Test Loss 0.391 
Epoch 167, Step 1900 | Training Accuracy: 0.883 | Test Accuracy: 0.812 | Training Loss: 0.267 | Test Loss 0.384 
Epoch 167, Step 2375 | Training Accuracy: 0.875 | Test Accuracy: 0.820 | Training Loss: 0.320 | Test Loss 0.404 
Epoch 167, Step 2850 | Training Accuracy: 0.867 | Test Accuracy: 0.852 | Training Loss: 0.314 | Test Loss 0.318 
Epoch 167, Step 3325 | Training Accuracy: 0.797 | Test Accuracy: 0.805 | Training Loss: 0.442 | Test Loss 0.471 
Epoch 167, Step 3800 | Training Accuracy: 0.875 | Test Accura

Epoch 175, Step 2850 | Training Accuracy: 0.852 | Test Accuracy: 0.734 | Training Loss: 0.320 | Test Loss 0.488 
Epoch 175, Step 3325 | Training Accuracy: 0.883 | Test Accuracy: 0.844 | Training Loss: 0.275 | Test Loss 0.330 
Epoch 175, Step 3800 | Training Accuracy: 0.820 | Test Accuracy: 0.852 | Training Loss: 0.347 | Test Loss 0.332 
(*) Entering Epoch 176 (16024.712 s)
Epoch 176, Step 475 | Training Accuracy: 0.898 | Test Accuracy: 0.836 | Training Loss: 0.251 | Test Loss 0.358 
Epoch 176, Step 950 | Training Accuracy: 0.859 | Test Accuracy: 0.812 | Training Loss: 0.313 | Test Loss 0.407 
Epoch 176, Step 1425 | Training Accuracy: 0.820 | Test Accuracy: 0.820 | Training Loss: 0.374 | Test Loss 0.387 
Epoch 176, Step 1900 | Training Accuracy: 0.859 | Test Accuracy: 0.875 | Training Loss: 0.307 | Test Loss 0.397 
Epoch 176, Step 2375 | Training Accuracy: 0.883 | Test Accuracy: 0.805 | Training Loss: 0.243 | Test Loss 0.428 
Epoch 176, Step 2850 | Training Accuracy: 0.820 | Test Accura

Epoch 184, Step 1900 | Training Accuracy: 0.812 | Test Accuracy: 0.836 | Training Loss: 0.406 | Test Loss 0.426 
Epoch 184, Step 2375 | Training Accuracy: 0.875 | Test Accuracy: 0.812 | Training Loss: 0.279 | Test Loss 0.422 
Epoch 184, Step 2850 | Training Accuracy: 0.844 | Test Accuracy: 0.852 | Training Loss: 0.273 | Test Loss 0.342 
Epoch 184, Step 3325 | Training Accuracy: 0.898 | Test Accuracy: 0.805 | Training Loss: 0.238 | Test Loss 0.414 
Epoch 184, Step 3800 | Training Accuracy: 0.891 | Test Accuracy: 0.805 | Training Loss: 0.306 | Test Loss 0.380 
(*) Entering Epoch 185 (16842.980 s)
Epoch 185, Step 475 | Training Accuracy: 0.906 | Test Accuracy: 0.867 | Training Loss: 0.253 | Test Loss 0.335 
Epoch 185, Step 950 | Training Accuracy: 0.883 | Test Accuracy: 0.812 | Training Loss: 0.278 | Test Loss 0.409 
Epoch 185, Step 1425 | Training Accuracy: 0.844 | Test Accuracy: 0.758 | Training Loss: 0.306 | Test Loss 0.498 
Epoch 185, Step 1900 | Training Accuracy: 0.891 | Test Accura

In [6]:
train(config)#, restore = True)

(*) Entering Epoch 0 (11.676 s)
Graph saved to file: checkpoints/biRNN_kst_rho0_epoch0.ckpt-0
Epoch 0, Step 475 | Training Accuracy: 0.609 | Test Accuracy: 0.562 | Training Loss: 0.665 | Test Loss 0.670 
Epoch 0, Step 950 | Training Accuracy: 0.734 | Test Accuracy: 0.555 | Training Loss: 0.610 | Test Loss 0.665 
Epoch 0, Step 1425 | Training Accuracy: 0.664 | Test Accuracy: 0.539 | Training Loss: 0.605 | Test Loss 0.675 
Epoch 0, Step 1900 | Training Accuracy: 0.664 | Test Accuracy: 0.633 | Training Loss: 0.612 | Test Loss 0.646 
Epoch 0, Step 2375 | Training Accuracy: 0.648 | Test Accuracy: 0.711 | Training Loss: 0.585 | Test Loss 0.637 
Epoch 0, Step 2850 | Training Accuracy: 0.742 | Test Accuracy: 0.766 | Training Loss: 0.510 | Test Loss 0.562 
Epoch 0, Step 3325 | Training Accuracy: 0.672 | Test Accuracy: 0.641 | Training Loss: 0.572 | Test Loss 0.632 
Epoch 0, Step 3800 | Training Accuracy: 0.703 | Test Accuracy: 0.656 | Training Loss: 0.547 | Test Loss 0.598 
(*) Entering Epoch 1

Epoch 8, Step 3800 | Training Accuracy: 0.828 | Test Accuracy: 0.875 | Training Loss: 0.387 | Test Loss 0.357 
(*) Entering Epoch 9 (1219.827 s)
Epoch 9, Step 475 | Training Accuracy: 0.828 | Test Accuracy: 0.828 | Training Loss: 0.394 | Test Loss 0.364 
Epoch 9, Step 950 | Training Accuracy: 0.836 | Test Accuracy: 0.805 | Training Loss: 0.418 | Test Loss 0.408 
Epoch 9, Step 1425 | Training Accuracy: 0.844 | Test Accuracy: 0.789 | Training Loss: 0.366 | Test Loss 0.465 
Epoch 9, Step 1900 | Training Accuracy: 0.820 | Test Accuracy: 0.820 | Training Loss: 0.396 | Test Loss 0.405 
Epoch 9, Step 2375 | Training Accuracy: 0.758 | Test Accuracy: 0.781 | Training Loss: 0.455 | Test Loss 0.444 
Epoch 9, Step 2850 | Training Accuracy: 0.789 | Test Accuracy: 0.750 | Training Loss: 0.420 | Test Loss 0.466 
Epoch 9, Step 3325 | Training Accuracy: 0.797 | Test Accuracy: 0.852 | Training Loss: 0.423 | Test Loss 0.376 
Epoch 9, Step 3800 | Training Accuracy: 0.789 | Test Accuracy: 0.844 | Training 

KeyboardInterrupt: 

In [7]:
ckpt = tf.train.get_checkpoint_state(directories.checkpoints)
biRNN = BiRNN(config, training = False)
network_output = biRNN.predict(ckpt)

np.save(os.path.join(directories.checkpoints, '{}_{}_y_pred.npy'.format(config.channel, config.mode)), network_output)
np.save(os.path.join(directories.checkpoints, '{}_{}_y_test.npy'.format(config.channel, config.mode)), df_y_test.values)

AttributeError: 'BiRNN' object has no attribute 'ema'