# Dialogue Act Classification

The document is self-contained. All cells that include training procedures can be skiped and the pretrained models can be loaded in the next cells from the corresponding paths.

In [1]:
import os
import sys
import csv
import time
import json
import datetime
import pickle as pkl
import tensorflow as tf
from tensorflow.contrib import learn
import numpy as np

#sys.path.insert(0, 'drive/My Drive/NLP_project')

from utilities import *

Using TensorFlow backend.


## Baseline LSTM model - Uterrance classification

In [22]:
resource_dir = 'data/'
embeddings_dir = "embeddings/"
embedding_filename = 'word2vec_GoogleNews'
model_name = "Embeddings Model"

# Load metadata
metadata = pkl.load(open(resource_dir + "metadata.pkl", 'rb'))
embeddings_dimension = 300
embeddings = pkl.load(open(embeddings_dir + embedding_filename + '_' + str(embeddings_dimension) + 'dim.pkl','rb'))

#Load training,test and validation test
train_data = pkl.load(open(resource_dir + 'train_data.pkl','rb'))
train_x, train_y = generate_embeddings(train_data, metadata)
train_y=np.array(np.nonzero(train_y))[1] 

test_data = pkl.load(open(resource_dir + 'test_data.pkl','rb'))
test_x, test_y = generate_embeddings(test_data, metadata)
test_y=np.array(np.nonzero(test_y))[1]   

val_data = pkl.load(open(resource_dir + 'val_data.pkl','rb'))
val_x, val_y = generate_embeddings(val_data, metadata)
val_y=np.array(np.nonzero(val_y))[1]  

train_lengths = np.array(list(map(len, [sent for sent in train_data['utterances']]))) 
valid_lengths = np.array(list(map(len, [sent for sent in val_data['utterances']]))) 
test_lengths = np.array(list(map(len, [sent for sent in test_data['utterances']]))) 

# Parameters
####Delete all flags before declare#####

def del_all_flags(FLAGS):
    flags_dict = FLAGS._flags()    
    keys_list = [keys for keys in flags_dict]    
    for keys in keys_list:
        FLAGS.__delattr__(keys)

del_all_flags(tf.flags.FLAGS)

FLAGS = tf.flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')


tf.flags.DEFINE_string('clf', 'lstm', "Type of classifier")
# Data parameters
tf.flags.DEFINE_integer('num_classes', metadata['num_labels'], 'Number of classes')
tf.flags.DEFINE_integer('max_length', metadata['max_utterance_len'], 'Max document length')
tf.flags.DEFINE_integer('vocab_size', metadata['vocabulary_size'], 'Vocabulary size')


# LSTM hyperparameters
tf.flags.DEFINE_boolean('prob', False, 'If we fit probalistic word embeddings')
tf.flags.DEFINE_integer('embedding_size', embeddings_dimension , 'Word embedding size. LSTM.')
tf.flags.DEFINE_integer('hidden_size', 300, 'Number of hidden units in the LSTM cell. For LSTM, Bi-LSTM')
tf.flags.DEFINE_integer('num_layers', 2, 'Number of the LSTM cells. For LSTM, Bi-LSTM, C-LSTM')
tf.flags.DEFINE_float('keep_prob', 0.5, 'Dropout keep probability')  # All
tf.flags.DEFINE_float('learning_rate', 1e-3, 'Learning rate')  # All
tf.flags.DEFINE_float('l2_reg_lambda', 0.001, 'L2 regularization lambda')  # All

# Training parameters
tf.flags.DEFINE_integer('batch_size', 200, 'Batch size')
tf.flags.DEFINE_integer('num_epochs', 10, 'Number of epochs')
tf.flags.DEFINE_integer('evaluate_every_steps', 100, 'Evaluate the model on validation set after this many steps')
tf.flags.DEFINE_integer('save_every_steps', 1000, 'Save the model after this many steps')
tf.flags.DEFINE_integer('num_checkpoint', 10, 'Number of models to store')


embedding_matrix = embeddings['embedding_matrix']

print("------------------------------------")
print("Model parameters")
print("------------------------------------")
print("Vocabulary size: ", FLAGS.vocab_size)
print("Number of labels: ", FLAGS.num_classes)
print("Embeddings dimension: ", FLAGS.embedding_size)
print("Batch size: ", FLAGS.batch_size)
print("Hidden layer size: ", FLAGS.hidden_size)
print("learning rate: ", FLAGS.learning_rate)
print("Epochs: ", FLAGS.num_epochs)
print("------------------------------------")
print('Training set', train_x.shape, train_y.shape)
print('Validation set', val_x.shape, val_y.shape)
print('Test set', test_x.shape, test_y.shape)
# Build the model

------------------------------------
Model parameters
------------------------------------
Vocabulary size:  23103
Number of labels:  41
Embeddings dimension:  300
Batch size:  200
Hidden layer size:  300
learning rate:  0.001
Epochs:  10
------------------------------------
Training set (192768, 106) (192768,)
Validation set (3196, 106) (3196,)
Test set (4088, 106) (4088,)


In [14]:
class rnn_clf(object):
    """"
    LSTM model
    """
    def __init__(self, config):
        self.num_classes = config.num_classes
        self.vocab_size = config.vocab_size
        self.hidden_size = config.hidden_size
        self.num_layers = config.num_layers
        self.l2_reg_lambda = config.l2_reg_lambda
        self.embedding_size = config.embedding_size
        self.prob = config.prob

        # Placeholders
        if self.prob == True:
          self.input_x = tf.placeholder(dtype=tf.float32, shape=[None, None,self.embedding_size], name='input_x')
        else:
          self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, None], name='input_x')
        
        self.batch_size = tf.placeholder(dtype=tf.int32, shape=[], name='batch_size')
        self.input_y = tf.placeholder(dtype=tf.int64, shape=[None], name='input_y')
        self.keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
        self.sequence_length = tf.placeholder(dtype=tf.int32, shape=[None], name='sequence_length')

        # L2 loss
        self.l2_loss = tf.constant(0.0)
        
        if self.prob:
          inputs = self.input_x
        else:
          # Word embedding
          with tf.device('/cpu:0'), tf.name_scope('embedding'):
            embedding = tf.get_variable("embedding", shape=[self.vocab_size,  self.embedding_size], initializer=tf.constant_initializer(np.array(embedding_matrix)))
            inputs = tf.nn.embedding_lookup(embedding, self.input_x)
   

        # Input dropout
        self.inputs = tf.nn.dropout(inputs, keep_prob=self.keep_prob)

        # LSTM
        self.final_state = self.normal_lstm()


        # Softmax output layer
        with tf.name_scope('softmax'):
            softmax_w = tf.get_variable('softmax_w', shape=[self.hidden_size, self.num_classes], dtype=tf.float32)
            softmax_b = tf.get_variable('softmax_b', shape=[self.num_classes], dtype=tf.float32)

            # L2 regularization for output layer
            self.l2_loss += tf.nn.l2_loss(softmax_w)
            self.l2_loss += tf.nn.l2_loss(softmax_b)

            self.logits = tf.matmul(self.final_state[self.num_layers - 1].h, softmax_w) + softmax_b
           
            predictions = tf.nn.softmax(self.logits)
            self.predictions = tf.argmax(predictions, 1, name='predictions')

        # Loss
        with tf.name_scope('loss'):
            tvars = tf.trainable_variables()

            # L2 regularization for LSTM weights
            for tv in tvars:
                if 'kernel' in tv.name:
                    self.l2_loss += tf.nn.l2_loss(tv)

            losses =  tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y,
                                                                    logits=self.logits)
            self.cost = tf.reduce_mean(losses) + self.l2_reg_lambda * self.l2_loss

        # Accuracy
        with tf.name_scope('accuracy'):
            correct_predictions = tf.equal(self.predictions, self.input_y)
            self.correct_num = tf.reduce_sum(tf.cast(correct_predictions, tf.float32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name='accuracy')

    def normal_lstm(self):
        # LSTM Cell
        cell = tf.contrib.rnn.LSTMCell(self.hidden_size,
                                       forget_bias=1.0,
                                       state_is_tuple=True,
                                       reuse=tf.get_variable_scope().reuse)
        # Add dropout to cell output
        cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)

        # Stacked LSTMs
        cell = tf.contrib.rnn.MultiRNNCell([cell] * self.num_layers, state_is_tuple=True)

        self._initial_state = cell.zero_state(self.batch_size, dtype=tf.float32)

        # Dynamic LSTM
        with tf.variable_scope('LSTM'):
            outputs, state = tf.nn.dynamic_rnn(cell,
                                               inputs=self.inputs,
                                               initial_state=self._initial_state,
                                               sequence_length=self.sequence_length)
        

        final_state = state

        return final_state


In [4]:
def batch_iter(data, labels, lengths, batch_size, num_epochs):
    """
    A mini-batch iterator to generate mini-batches for training neural network
    :param data: a list of sentences. each sentence is a vector of integers
    :param labels: a list of labels
    :param batch_size: the size of mini-batch
    :param num_epochs: number of epochs
    :return: a mini-batch iterator
    """
    assert len(data) == len(labels) == len(lengths)

    data_size = len(data) #192768 utterances
    epoch_length = data_size // batch_size # 963 steps for each epoch

    for e in range(num_epochs):
        for i in range(epoch_length):
            start_index = i * batch_size
            end_index = start_index + batch_size

            xdata = data[start_index: end_index]
            ydata = labels[start_index: end_index]
            sequence_length = lengths[start_index: end_index]

            yield xdata, ydata, sequence_length,e

### Train model

In [0]:

FLAGS.embedding_size = FLAGS.hidden_size


# Direcotry of outputs
timestamp = str(int(time.time()))
outdir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
if not os.path.exists(outdir):
    os.makedirs(outdir)



# Batch Generator
data_train = batch_iter(train_x, train_y, train_lengths, FLAGS.batch_size, FLAGS.num_epochs)

# Train
# =============================================================================

with tf.Graph().as_default():
    with tf.Session() as sess:
        classifier = rnn_clf(FLAGS)

        # Train procedure
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
        grads_and_vars = optimizer.compute_gradients(classifier.cost)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoint)

        sess.run(tf.global_variables_initializer())


        def run_step(input_data, is_training=True):
            """Run one step of the training process."""
            input_x, input_y, sequence_length,e = input_data



            fetches = {'step': global_step,
                       'cost': classifier.cost,
                       'accuracy': classifier.accuracy,
                      'final_state': classifier.final_state}
            feed_dict = {classifier.input_x: input_x,
                         classifier.input_y: input_y,
                        classifier.batch_size:len(input_x),
                        classifier.sequence_length:sequence_length}


            if is_training:
                fetches['train_op'] = train_op
                feed_dict[classifier.keep_prob] = FLAGS.keep_prob
            else:
                feed_dict[classifier.keep_prob] = 1.0

            vars = sess.run(fetches, feed_dict)
            step = vars['step']
            cost = vars['cost']
            accuracy = vars['accuracy']

            time_str = datetime.datetime.now().isoformat()
            print("{}: epoch: {}/{} step: {}, loss: {:g}, accuracy: {:g}".format(time_str,e+1,FLAGS.num_epochs, step, cost, accuracy))
       

            return accuracy


        print('Start training .......')

        for train_input in data_train:
            run_step(train_input, is_training=True)
            current_step = tf.train.global_step(sess, global_step)

            if current_step % FLAGS.evaluate_every_steps == 0:
                print('\nValidation')
                run_step((val_x, val_y, valid_lengths,train_input[-1]), is_training=False)
 
                print('')

            if current_step % FLAGS.save_every_steps == 0:
                save_path = saver.save(sess, os.path.join(outdir, 'model/clf'), current_step)

        print('\nAll the files have been saved to {}\n'.format(outdir))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Start training .......
2019-03-14T14:52:15.206264: epoch: 1/10 step: 1, loss: 4.1149, accuracy: 0.015
2019-03-14T14:52:15.526564: epoch: 1/10 step: 2, loss: 3.99619, accuracy: 0.365
2019-03-14T14:52:15.829764: epoch: 1/10 step: 3, loss: 3.97667, accuracy: 0.275
2019-03-14T14:52:16.211337: epoch: 1/10 step: 4, loss: 3.61177, accuracy: 0.4
2019-03-14T14:52:16.515460: epoch: 1/10 step: 5, loss: 3.26694, 

### Test model

In [23]:
## Test

def del_all_flags(FLAGS):
    flags_dict = FLAGS._flags()    
    keys_list = [keys for keys in flags_dict]    
    for keys in keys_list:
        FLAGS.__delattr__(keys)

del_all_flags(tf.flags.FLAGS)



# File pathscontent/runs/1552575131
tf.flags.DEFINE_string('run_dir', 'runs/1552575131', 'Restore the model from this run')
tf.flags.DEFINE_string('checkpoint', 'clf-9000', 'Restore the graph from this checkpoint')

# Test batch size
tf.flags.DEFINE_integer('batch_size', 64, 'Test batch size')

FLAGS = tf.flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')

# Restore graph
graph = tf.Graph()
with graph.as_default():
    sess = tf.Session()
    # Restore metagraph
    saver = tf.train.import_meta_graph('{}.meta'.format(os.path.join(FLAGS.run_dir, 'model', FLAGS.checkpoint)))
    # Restore weights
    saver.restore(sess, os.path.join(FLAGS.run_dir, 'model', FLAGS.checkpoint))

    # Get tensors
    input_x = graph.get_tensor_by_name('input_x:0')
    input_y = graph.get_tensor_by_name('input_y:0')
    keep_prob = graph.get_tensor_by_name('keep_prob:0')
    predictions = graph.get_tensor_by_name('softmax/predictions:0')
    accuracy = graph.get_tensor_by_name('accuracy/accuracy:0')

    # Generate batches
    batches = batch_iter(test_x, test_y, test_lengths, FLAGS.batch_size, 1)

    num_batches = int(len(test_x)/FLAGS.batch_size)
    set_predictions = []
    sum_acc = 0

    # Test
    for batch in batches:
        x_test, y_test, x_lengths,e = batch
        batch_size = graph.get_tensor_by_name('batch_size:0')
        sequence_length = graph.get_tensor_by_name('sequence_length:0')
        feed_dict = {input_x: x_test, 
                     input_y: y_test, 
                     batch_size: FLAGS.batch_size, 
                     sequence_length: x_lengths, 
                     keep_prob: 1.0}

        batch_predictions, batch_accuracy = sess.run([predictions, accuracy], feed_dict)

        sum_acc += batch_accuracy
        set_predictions = np.concatenate([set_predictions, batch_predictions])

    average_accuracy = sum_acc / num_batches

# Print total test accuracy
print("==============================")
print("Baseline LSTM model ")
print("******************************")
print('Test accuracy: {}'.format(average_accuracy))
print("==============================")
# Save all predictions
with open(os.path.join(FLAGS.run_dir, 'predictions.csv'), 'w', encoding='utf-8', newline='') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(['True class', 'Prediction'])
    for i in range(len(set_predictions)):
        csvwriter.writerow([test_y[i], set_predictions[i]])
    print('Predictions saved to {}'.format(os.path.join(FLAGS.run_dir, 'predictions.csv')))


INFO:tensorflow:Restoring parameters from runs/1552575131\model\clf-9000
Baseline LSTM model 
******************************
Test accuracy: 0.7078373015873016
Predictions saved to runs/1552575131\predictions.csv


## Baseline LSTM model + Reinforced Semantic Embeddings

In [18]:
word_frequency = 2
frequency_data = pkl.load(open(embeddings_dir + 'probabilistic_freq_' + str(word_frequency) + '.pkl', 'rb'))

# Load Training and test sets
train_data = pkl.load(open(resource_dir + 'train_data.pkl', 'rb'))

#Generate Probabilistic Embeddings training set
train_x, train_y = generate_probabilistic_embeddings(train_data, frequency_data, metadata)
train_y=np.array(np.nonzero(train_y))[1] 

test_data = pkl.load(open(resource_dir + 'test_data.pkl', 'rb'))

#Generate Probabilistic Embeddings test set
test_x, test_y = generate_probabilistic_embeddings(test_data, frequency_data, metadata)
test_y=np.array(np.nonzero(test_y))[1]   

val_data = pkl.load(open(resource_dir + 'val_data.pkl', 'rb'))

#Generate Probabilistic Embeddings validation set
val_x, val_y = generate_probabilistic_embeddings(val_data, frequency_data, metadata)
val_y=np.array(np.nonzero(val_y))[1]  


train_lengths = np.array(list(map(len, [sent for sent in train_data['utterances']]))) 
valid_lengths = np.array(list(map(len, [sent for sent in val_data['utterances']]))) 
test_lengths = np.array(list(map(len, [sent for sent in test_data['utterances']]))) 

### Train

In [7]:
# Directory of outputs
timestamp = str(int(time.time()))
outdir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
if not os.path.exists(outdir):
    os.makedirs(outdir)

def del_all_flags(FLAGS):
  flags_dict = FLAGS._flags()    
  keys_list = [keys for keys in flags_dict]    
  for keys in keys_list:
    FLAGS.__delattr__(keys)

del_all_flags(tf.flags.FLAGS)

FLAGS = tf.flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')


tf.flags.DEFINE_string('clf', 'lstm', "Type of classifier")
# Data parameters
tf.flags.DEFINE_integer('num_classes', metadata['num_labels'], 'Number of classes')
tf.flags.DEFINE_integer('max_length', metadata['max_utterance_len'], 'Max document length')
tf.flags.DEFINE_integer('vocab_size', metadata['vocabulary_size'], 'Vocabulary size')


# LSTM hyperparameters
tf.flags.DEFINE_boolean('prob', True, 'If we fit probalistic word embeddings')
tf.flags.DEFINE_integer('embedding_size', 41 , 'Word embedding size. LSTM.')
tf.flags.DEFINE_integer('hidden_size', 41, 'Number of hidden units in the LSTM cell. For LSTM, Bi-LSTM')
tf.flags.DEFINE_integer('num_layers', 2, 'Number of the LSTM cells. For LSTM, Bi-LSTM, C-LSTM')
tf.flags.DEFINE_float('keep_prob', 1, 'Dropout keep probability')  # All
tf.flags.DEFINE_float('learning_rate', 1e-3, 'Learning rate')  # All
tf.flags.DEFINE_float('l2_reg_lambda', 0.001, 'L2 regularization lambda')  # All

# Training parameters
tf.flags.DEFINE_integer('batch_size', 100, 'Batch size')
tf.flags.DEFINE_integer('num_epochs', 10, 'Number of epochs')
tf.flags.DEFINE_integer('evaluate_every_steps', 100, 'Evaluate the model on validation set after this many steps')
tf.flags.DEFINE_integer('save_every_steps', 1000, 'Save the model after this many steps')
tf.flags.DEFINE_integer('num_checkpoint', 10, 'Number of models to store')





# Batch iterator
data_train = batch_iter(train_x, train_y, train_lengths, FLAGS.batch_size, FLAGS.num_epochs)

# Train
# =============================================================================

with tf.Graph().as_default():
    with tf.Session() as sess:
        classifier = rnn_clf(FLAGS)

        # Train procedure
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
        grads_and_vars = optimizer.compute_gradients(classifier.cost)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoint)

        sess.run(tf.global_variables_initializer())


        def run_step(input_data, is_training=True):
            """Run one step of the training process."""
            input_x, input_y, sequence_length,e = input_data



            fetches = {'step': global_step,
                       'cost': classifier.cost,
                       'accuracy': classifier.accuracy,
                      'final_state': classifier.final_state}
            feed_dict = {classifier.input_x: input_x,
                         classifier.input_y: input_y,
                        classifier.batch_size:len(input_x),
                        classifier.sequence_length:sequence_length}


            if is_training:
                fetches['train_op'] = train_op
                feed_dict[classifier.keep_prob] = FLAGS.keep_prob
            else:
                feed_dict[classifier.keep_prob] = 1.0

            vars = sess.run(fetches, feed_dict)
            step = vars['step']
            cost = vars['cost']
            accuracy = vars['accuracy']

            time_str = datetime.datetime.now().isoformat()
            print("{}: epoch: {}/{} step: {}, loss: {:g}, accuracy: {:g}".format(time_str,e+1,FLAGS.num_epochs, step, cost, accuracy))
            #print("{}: step: {}, loss: {:g}, accuracy: {:g}".format(time_str, step, cost, accuracy))

            return accuracy


        print('Start training .......')

        for train_input in data_train:
            run_step(train_input, is_training=True)
            current_step = tf.train.global_step(sess, global_step)

            if current_step % FLAGS.evaluate_every_steps == 0:
                print('\nValidation')
                run_step((val_x, val_y, valid_lengths,train_input[-1]), is_training=False)
                #run_step((val_x, val_y, valid_lengths), is_training=False)
                print('')

            if current_step % FLAGS.save_every_steps == 0:
                save_path = saver.save(sess, os.path.join(outdir, 'model/clf'), current_step)

        print('\nAll the files have been saved to {}\n'.format(outdir))




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Colocations handled automatically by placer.
Start training .......
2019-03-14T19:02:00.716166: epoch: 1/10 step: 1, loss: 3.76969, accuracy: 0
2019-03-14T19:02:00.823179: epoch: 1/10 step: 2, loss: 3.71624, accuracy: 0
2019-03-14T19:02:00.915097: epoch: 1/10 step: 3, loss: 3.69236, accuracy: 0.13
2019-03-14T19:02:00.989811: epoch: 1/10 step: 4, loss: 3.7068, accuracy: 0.1
2019-03-14T19:02:01.091299: epoch: 1/10 step: 5, loss: 3.61192, accuracy:

### Test 

In [20]:
## Test
def del_all_flags(FLAGS):
    flags_dict = FLAGS._flags()    
    keys_list = [keys for keys in flags_dict]    
    for keys in keys_list:
        FLAGS.__delattr__(keys)

del_all_flags(tf.flags.FLAGS)


# File pathscontent/runs/1552590117
tf.flags.DEFINE_string('run_dir', 'runs/1552590117', 'Restore the model from this run')
tf.flags.DEFINE_string('checkpoint', 'clf-18000', 'Restore the graph from this checkpoint')

# Test batch size
tf.flags.DEFINE_integer('batch_size', 64, 'Test batch size')

FLAGS = tf.flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')

# Restore graph
graph = tf.Graph()
with graph.as_default():
    sess = tf.Session()
    # Restore metagraph
    saver = tf.train.import_meta_graph('{}.meta'.format(os.path.join(FLAGS.run_dir, 'model', FLAGS.checkpoint)))
    # Restore weights
    saver.restore(sess, os.path.join(FLAGS.run_dir, 'model', FLAGS.checkpoint))

    # Get tensors
    input_x = graph.get_tensor_by_name('input_x:0')
    input_y = graph.get_tensor_by_name('input_y:0')
    keep_prob = graph.get_tensor_by_name('keep_prob:0')
    predictions = graph.get_tensor_by_name('softmax/predictions:0')
    accuracy = graph.get_tensor_by_name('accuracy/accuracy:0')

    # Generate batches
    batches = batch_iter(test_x, test_y, test_lengths, FLAGS.batch_size, 1)

    num_batches = int(len(test_x)/FLAGS.batch_size)
    all_predictions = []
    sum_accuracy = 0

    # Test
    for batch in batches:
        x_test, y_test, x_lengths,e = batch
        batch_size = graph.get_tensor_by_name('batch_size:0')
        sequence_length = graph.get_tensor_by_name('sequence_length:0')
        feed_dict = {input_x: x_test,
                     input_y: y_test,
                     batch_size: FLAGS.batch_size,
                     sequence_length: x_lengths,
                     keep_prob: 1.0}

        batch_predictions, batch_accuracy = sess.run([predictions, accuracy], feed_dict)

        sum_accuracy += batch_accuracy
        all_predictions = np.concatenate([all_predictions, batch_predictions])

    final_accuracy = sum_accuracy / num_batches

# Print test accuracy
print("==============================")
print('Baseline LSTM model + Probabilistic Word Embeddings')
print("******************************")
print('Test accuracy: {}'.format(final_accuracy))
print("==============================")

INFO:tensorflow:Restoring parameters from runs/1552590117\model\clf-18000
Baseline LSTM model + Probabilistic Word Embeddings
******************************
Test accuracy: 0.7380952380952381


## Baseline LSTM model + Reinforced Semantic Embeddings + Proposed Discourse model

The utterance representations of the baseline model have been extracted and are uploaded here to be feeded in the RNN model. The utterance represantations are the output states of the LSTM cells in our basiline model and are represented as vectors with shape (192768, 128).

In [73]:
#Load the utterance representation
utterances_repre = pkl.load(open(resource_dir + "utterance_repres.pkl", 'rb'))
trainx = utterances_repre['trainx']
testx =utterances_repre['testx']
train_y = utterances_repre['trainy']
test_y = utterances_repre['testy']

print("------------------------------------")
print('Training set', trainx.shape, train_y.shape)
print('Test set', testx.shape, test_y.shape)

------------------------------------
Training set (192768, 128) (192768, 41)
Test set (4088, 128) (4088, 41)


######  Create triples of sentences

In [37]:

x_test = np.zeros((len(testx), 3, 128))

for i in range(testx.shape[0]):
  if i == 0:
    x_test[i,0,:] = 0
    x_test[i,1,:] = 0
    x_test[i,2,:] = testx[i,:]
    
    
  if i == 1:
    x_test[i,0,:] = 0
    x_test[i,1,:] = testx[i-1,:]
    x_test[i,2,:] = testx[i,:]
    
  if i > 1 : 
    x_test[i,0,:] = testx[i-2,:]
    x_test[i,1,:] = testx[i-1,:]
    x_test[i,2,:] = testx[i,:]
    

x_train = np.zeros((len(trainx), 3, 128))

for i in range(trainx.shape[0]):
  if i == 0:
    x_train[i,0,:] = 0
    x_train[i,1,:] = 0
    x_train[i,2,:] = trainx[i,:]
    
    
  if i == 1:
    x_train[i,0,:] = 0
    x_train[i,1,:] = trainx[i-1,:]
    x_train[i,2,:] = trainx[i,:]
    
  if i > 1 : 
    x_train[i,0,:] = trainx[i-2,:]
    x_train[i,1,:] = trainx[i-1,:]
    x_train[i,2,:] = trainx[i,:]


### Build and train RNN discourse model

In [38]:
import datetime
import time
from keras.models import load_model
from keras import Sequential
from keras.layers import LSTM, TimeDistributed, Dense, GlobalMaxPooling1D, GlobalAveragePooling1D,SimpleRNN
from keras.optimizers import RMSprop

In [61]:
num_labels = 41
batch_size = 100
hidden_layer = 128
learning_rate = 0.001
num_epoch = 10
dorpout_r = 0.2
decay_r = 0.004

model_name = 'Embeddings Model' + " -" + \
             " Epochs=" + str(num_epoch) + \
             " Hidden Layers=" + str(hidden_layer)

# Build the model
print("------------------------------------")
print('Build model...')
model1 = Sequential()

model1.add(SimpleRNN(hidden_layer, 
                     input_shape = (3,128),
                     dropout = dorpout_r,
                     use_bias=True, 
                     return_sequences=False,
                     kernel_initializer='random_uniform',
                     recurrent_initializer='glorot_uniform'))

model1.add(Dense(num_labels, activation='softmax'))

optimizer = RMSprop(lr = learning_rate,decay = decay_r)
model1.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
print(model1.summary())


# Train the model
print("------------------------------------")
print("Training model...")

start_time = time.time()
print("Training started: " + datetime.datetime.now().strftime("%b %d %T") + " for", num_epoch, "epochs")

# for i in range(len(train_y)):
history = model1.fit(x_train, 
                     train_y, 
                     epochs=num_epoch, 
                     batch_size=batch_size, 
                     validation_data=(x_test, test_y), 
                     verbose=2)

# Save model and history
model1.save(model_name + '.hdf5', overwrite=True)

end_time = time.time()
print("Training took " + str(('%.3f' % (end_time - start_time))) + " seconds for", num_epoch, "epochs")



------------------------------------
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_16 (SimpleRNN)    (None, 128)               32896     
_________________________________________________________________
dense_15 (Dense)             (None, 41)                5289      
Total params: 38,185
Trainable params: 38,185
Non-trainable params: 0
_________________________________________________________________
None
------------------------------------
Training model...
Training started: Mar 15 23:17:46 for 10 epochs
Train on 192768 samples, validate on 4088 samples
Epoch 1/10
 - 9s - loss: 0.7093 - acc: 0.7692 - val_loss: 0.6841 - val_acc: 0.7708
Epoch 2/10
 - 7s - loss: 0.6593 - acc: 0.7795 - val_loss: 0.6825 - val_acc: 0.7715
Epoch 3/10
 - 8s - loss: 0.6521 - acc: 0.7814 - val_loss: 0.6772 - val_acc: 0.7740
Epoch 4/10
 - 8s - loss: 0.6478 - acc: 0.7824 - val_loss: 0.6746 - val_acc:

In [65]:
model1 = load_model('runs/discourse_model/'+model_name + '.hdf5')
# Test set
test_scores = model1.evaluate(x_test, test_y, batch_size=batch_size, verbose=2)


# Print test accuracy
print("==============================")
print('Baseline LSTM model + Probabilistic Word Embeddings + Discourse Model')
print("******************************")
print('Test accuracy: {}'.format(test_scores[1]))
print("==============================")


Baseline LSTM model + Probabilistic Word Embeddings + Discourse Model
******************************
Test accuracy: 0.7754403090873576
