In [1]:
"""
Import all the necessary packages
"""
import tensorflow as tf
import numpy as np
import functools
import random
import argparse
from multiprocessing import Pool
from input import get_train_data,get_test_data,get_final_data
from tensorflow.python.framework.ops import reset_default_graph
from pprint import pprint
from datetime import datetime


In [3]:
"""
Load data into memory 
"""
print "loading data into memory"
pool = Pool(processes=3)
train_result = pool.apply_async(get_train_data)
test_a_result = pool.apply_async(get_test_data)
test_b_result = pool.apply_async(get_final_data)

test_inp, test_out = test_a_result.get()
print "test_a data loaded"

final_inp, final_out = test_b_result.get()
print "test_b data loaded"

train_inp, train_out = train_result.get()
print "train data loaded"

loading data into memory
test_a data loaded
test_b data loaded
train data loaded


In [4]:
"""
Analyse the data to m
"""
print([len(f) for f in test_inp[:10]])
print(test_inp[1][1])


[30, 30, 30, 30, 30, 30, 30, 30, 30, 30]
[ 0.00753893  0.4429522   0.6082194   0.47846727  0.63072812  0.65132619
  0.0244708   0.82934638  0.63344647  0.45958061  0.21532162  0.48766874
  0.99585648  0.89168342  0.42134235  0.54405465  0.94602753  0.20298426
  0.83931349  0.13234238  0.76526675  0.36424376  0.78451387  0.24423664
  0.40405354  0.94618541  0.64868175  0.35889811  0.83237328  0.75670462
  0.58740005  0.63011902  0.33657386  0.55998569  0.23590601  0.15847046
  0.07705077  0.52542988  0.71014924  0.49015792  0.37892592  0.27533119
  0.24534198  0.22301109  0.28631221  0.19378396  0.94386822  0.82044902
  0.10530504  0.17121434  0.30754337  0.86034054  0.61336142  0.84412731
  0.61880204  0.11399117  0.37042367  0.68083447  0.69293952  0.56450751
  0.73611949  0.41860562  0.62613314  0.83870405  0.95957705  0.23850788
  0.09996017  0.58575166  0.01141853  0.35075413  0.09235751  0.01974887
  0.44969944  0.81352404  0.03171994  0.79232107  0.88424554  0.79186866
  0.487637

In [29]:
"""
Helper Methods
"""

def weight_and_bias(in_size,out_size):
        weight = tf.truncated_normal([in_size,out_size], stddev=0.01, name="weight")
        bias = tf.constant(0.1, shape=[out_size], name="bias")
        return tf.Variable(weight), tf.Variable(bias)
    
def length(data):
    used = tf.sign(tf.reduce_max(tf.abs(data), reduction_indices=2))
    length = tf.reduce_sum(used, reduction_indices=1)
    length = tf.cast(length, tf.int32)
    return length
    

def error():
    mistakes = tf.not_equal(tf.argmax(target, 2), tf.argmax(prediction, 2))

    mistakes = tf.cast(mistakes, tf.float32)
    mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2))
    mistakes *= mask
    # Average over actual sequence lengths.
    mistakes = tf.reduce_sum(mistakes, reduction_indices=1)
    mistakes /= tf.cast(length(data), tf.float32)
    return tf.reduce_mean(mistakes)

"""
F1-prediction function
"""
def f1(prediction, target, length):
    tp=np.array([0]*(NUM_CLASSES+1))
    fp=np.array([0]*(NUM_CLASSES+1))
    fn=np.array([0]*(NUM_CLASSES+1))

    target = np.argmax(target, 2)
    prediction = np.argmax(prediction, 2)


    for i in range(len(target)):
        for j in range(length[i]):
            if target[i][j] == prediction[i][j]:
                tp[target[i][j]] += 1
            else:
                fp[target[i][j]] += 1
                fn[prediction[i][j]] += 1

    NON_NAMED_ENTITY = 11
    for i in range(NUM_CLASSES):
        if i != NON_NAMED_ENTITY:
            tp[NUM_CLASSES] += tp[i]
            fp[NUM_CLASSES] += fp[i]
            fn[NUM_CLASSES] += fn[i]

    precision = []
    recall = []
    fscore = []
    for i in range(NUM_CLASSES+1):
        precision.append(tp[i]*1.0/(tp[i]+fp[i]))
        recall.append(tp[i]*1.0/(tp[i]+ fn[i]))
        fscore.append(2.0*precision[i]*recall[i]/(precision[i]+recall[i]))

    print "precision = {}".format(["{:10.4f}%".format(f) for f in precision])
    print "recall = {}".format(["{:10.4f}%".format(f) for f in recall])
    print "f1score = {}".format(["{:10.4f}%".format(f) for f in fscore])
                            
    return fscore[NUM_CLASSES]

In [38]:
"""
Define all the model varaibles
"""
WORD_DIM = 113
MAX_SEQ_LEN = 30
NUM_CLASSES = 9
NUM_HIDDEN = 256
NUM_LAYERS = 2
DROPOUT = 0.5
LEARNING_RATE = 0.003

# resetting the graph before creating a new one
reset_default_graph()

print("defining variables")
"""
Initialize the placeholders needed to input the data
"""        

data = tf.placeholder(tf.float32,[None, MAX_SEQ_LEN, WORD_DIM], name="data_placeholder")
target = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, NUM_CLASSES], name="target_placeholder")

print "data_pl,", data.get_shape()
print "target_pl,", target.get_shape()


defining variables
data_pl, (?, 30, 113)
target_pl, (?, 30, 9)


In [39]:
"""
Create the tensorflow model used to train the NER reacogniser
"""

rnn_cell = tf.nn.rnn_cell
# Try: LSTMBlock cell or GruBlock cell
fw_cell = rnn_cell.LSTMCell(NUM_HIDDEN, state_is_tuple=True)
bw_cell = rnn_cell.LSTMCell(NUM_HIDDEN, state_is_tuple=True)

if NUM_LAYERS > 1:
    fw_cell = rnn_cell.MultiRNNCell([fw_cell] * NUM_LAYERS, state_is_tuple=True)
    fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=DROPOUT)
    bw_cell = rnn_cell.MultiRNNCell([bw_cell] * NUM_LAYERS, state_is_tuple=True)
    bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=DROPOUT)
else:
    fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=DROPOUT)
    bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=DROPOUT)

# Try: Dynamic Bidirectional RNN
output, _, _ = tf.nn.bidirectional_rnn(fw_cell, 
                                       bw_cell, 
                                       tf.unpack(tf.transpose(data, perm=[1, 0, 2])), 
                                       dtype=tf.float32, 
                                       sequence_length=length(data))

max_length = int(target.get_shape()[1])
num_classes = int(target.get_shape()[2])
weight, bias = weight_and_bias(2*NUM_HIDDEN, num_classes)
output = tf.reshape(tf.transpose(tf.pack(output), perm=[1, 0, 2]), [-1, 2 * NUM_HIDDEN], name="generate_output")
prediction = tf.nn.softmax(tf.matmul(output, weight) + bias, name="generate_prediction")
prediction = tf.reshape(prediction, [-1, max_length, num_classes], name="reshape_prediction")

# add TensorBoard summaries for all variables
tf.contrib.layers.summarize_variables()

# restricting memory usage, TensorFlow is greedy and will use all memory otherwise
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)

# initialize the Session
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts))

# test the forward pass
sess.run(tf.initialize_all_variables())

# setup and write summaries
timestamp = datetime.now().strftime("%Y%m%d-%H_%M_%S")
summaries_path = "tensorboard/%s/logs" % (timestamp)
summaries = tf.merge_all_summaries()
summarywriter = tf.train.SummaryWriter(summaries_path, sess.graph)
print("Ready")

## Define how the Network should calculate the Cost Function

In [40]:
"""
Define the Cost function
"""


def loss_and_acc(prediction):
    # computing cross entropy per sample
    cross_entropy = -tf.reduce_sum(target * tf.log(prediction+1e-10), reduction_indices=2)
    
    # Check if the maximum value on the secondary axis is positive or negative
    mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2)) 
    cross_entropy *= mask # Ensure the cross_entropy is positive (by multiplying either with -1 or 1)
    cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1) # Summarize the values on the primary axis
    cross_entropy /= tf.cast(length(data), tf.float32) # Convert all dimensions of the vector to 32float.
    
    # averaging over samples
    loss = tf.reduce_mean(cross_entropy) # Reduce the vector to the mean value on all dimensions
    
    # if you want regularization
    #reg_scale = 0.0001
    #regularize = tf.contrib.layers.l2_regularizer(reg_scale)
    #params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    #reg_term = sum([regularize(param) for param in params])
    #loss += reg_term
    
    # calculate accuracy
    argmax_pred = tf.to_int32(tf.argmax(prediction, dimension=1))
    argmax_target = tf.to_int32(tf.argmax(target, dimension=1))
    correct = tf.to_float(tf.equal(argmax_pred, argmax_target))
    accuracy = tf.reduce_mean(correct)
    return loss, accuracy, argmax_pred

# loss, accuracy and prediction
loss, accuracy, prediction = loss_and_acc(prediction)

loss_valid = loss
accuracy_valid = accuracy




# make tensorboard summeries
tf.scalar_summary('train/loss', loss)
tf.scalar_summary('train/accuracy', accuracy)
tf.scalar_summary('validation/loss', loss_valid)
tf.scalar_summary('validation/accuracy', accuracy_valid)
print("Ready")

<tf.Tensor 'ScalarSummary_3:0' shape=() dtype=string>

## Define how the network should optimize

In [43]:
clip_norm = 1
# defining our optimizer and
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)

# applying the gradients
grads_and_vars = optimizer.compute_gradients(loss)
gradients, variables = zip(*grads_and_vars)  # unzip list of tuples
clipped_gradients, global_norm = (tf.clip_by_global_norm(gradients, clip_norm))
clipped_grads_and_vars = zip(clipped_gradients, variables)

# make training operation for applying the gradients
train_operation = optimizer.apply_gradients(clipped_grads_and_vars)
print("Ready")

Ready


In [45]:
"""
Define the methods needed to train the model
"""

NUM_EPOCH = 100 # How many times we iterate through the dataset
BATCH_SIZE = 256 # How many sentences we analyse at a time
BATCH_AMOUNT = (len(train_inp) + BATCH_SIZE - 1) / BATCH_SIZE

saver = tf.train.Saver()

train_loss = []
train_acc = []
print("starting training")
for epoch in range(NUM_EPOCH):
    ptr = 0 # Used to figure what data have already been processed
    for batch_number in range(BATCH_AMOUNT):
        batch_inp, batch_out = train_inp[ptr:ptr+BATCH_SIZE], train_out[ptr:ptr+BATCH_SIZE]
        ptr += BATCH_SIZE
        
        # Define the input that should be given to the neural network at each run
        feed_values = {
            data: np.array(batch_inp),
            target: np.array(batch_out)
        }
        # Define the values that should be extracted from the neural network at each run
        fetch_values = [train_operation, loss, accuracy, summaries]
        
        return_values = sess.run(fetch_values, feed_values) # Give the session what it should return as well as inputs
        train_loss.append(return_values[1])                 # Store the loss at every run
        train_acc.append(return_values[2])                  # Store the accuracy at every run
        
    if epoch % 10 == 0:
        save_path = saver.save(sess, "model/model.ckpt")
        print("Model saved in file: %s" % save_path)
        
    pred = sess.run(model.prediction, {data: test_inp, target: test_out, dropout: 1})
    pred,length = sess.run(model.getpredf1, {data: test_inp, target: test_out, dropout: 1})
    print "Epoch:" + str(epoch), "TestA score,"
    m = f1(pred,test_out,length)
    if m > maximum:
        maximum = m
        save_path = saver.save(sess, "model/model_max.ckpt")
        print("Max Model saved in file: %s" % save_path)
        pred = sess.run(model.prediction, {data: final_inp, target: final_out, dropout: 1})
        pred,length = sess.run(model.getpredf1, {data: final_inp, target: final_out, dropout: 1})
        print "TestB score,"
        f1(pred, final_out, length)
        print"\n\n"


starting training


FailedPreconditionError: Attempting to use uninitialized value beta2_power_1
	 [[Node: beta2_power_1/read = Identity[T=DT_FLOAT, _class=["loc:@BiRNN_FW/MultiRNNCell/Cell0/LSTMCell/W_0"], _device="/job:localhost/replica:0/task:0/cpu:0"](beta2_power_1)]]
Caused by op u'beta2_power_1/read', defined at:
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/traitlets/config/application.py", line 592, in launch_instance
    app.start()
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 403, in start
    ioloop.IOLoop.instance().start()
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tornado/ioloop.py", line 883, in start
    handler_func(fd_obj, events)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 260, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 212, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 370, in execute_request
    user_expressions, allow_stdin)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 175, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2902, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 3006, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 3066, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-43-4ee17666bf47>", line 12, in <module>
    train_operation = optimizer.apply_gradients(clipped_grads_and_vars)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/training/optimizer.py", line 300, in apply_gradients
    self._create_slots(var_list)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/training/adam.py", line 115, in _create_slots
    trainable=False)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 211, in __init__
    dtype=dtype)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 323, in _init_from_args
    self._snapshot = array_ops.identity(self._variable, name="read")
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1098, in identity
    result = _op_def_lib.apply_op("Identity", input=input, name=name)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
    op_def=op_def)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2310, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/Vilstrup/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1232, in __init__
    self._traceback = _extract_stack()
