In [1]:
%reset -f

import tensorflow as tf
from batch_reader import Batcher
from data import Vocab

class TextSumarization(object):
    def __init__(self, vocab, options=None):
        self.vocab = vocab
        
        self.options = {
            'batch_size': 10,
            'input_seq_length': 300,
            'output_seq_length': 30,
            'embedding_dim': 128,
            'num_hidden': 256,
            'encode_layers': 4,
            'min_input_len': 5,
            'lr': 0.15,
            'min_lr': 0.01,
            'max_grad_norm': 2,
            'mode': 'train'
        }
        self.options.update(options if options else {})
        self.options['input_vocab_size'] = len(self.vocab)
        
    def add_placeholders(self):
        self.seq_input = tf.placeholder(tf.int32, [self.options['batch_size'], self.options['input_seq_length']], name='articles')
        self.seq_output = tf.placeholder(tf.int32, [self.options['batch_size'], self.options['output_seq_length']], name='abstracts')
        self.dropout_keep_prob_embedding = tf.placeholder(tf.float32, name="dropout_keep_prob_embedding")

        self.targets = tf.placeholder(tf.int32, [self.options['batch_size'], self.options['output_seq_length']], name='targets')
        self.article_lens = tf.placeholder(tf.int32, [self.options['batch_size']], name='article_lens')
        self.abstract_lens = tf.placeholder(tf.int32, [self.options['batch_size']], name='abstract_lens')
        self.loss_weights = tf.placeholder(tf.float32, [self.options['batch_size'], self.options['output_seq_length']], name='loss_weights')

    def add_ops(self):
        encoder_inputs = tf.unstack(tf.transpose(self.seq_input))
        decoder_inputs = tf.unstack(tf.transpose(self.seq_output))
    
        with tf.variable_scope("embedding"), tf.device("/cpu:0"):
            embedding = tf.get_variable(
                "W",
                [self.options['input_vocab_size'], self.options['embedding_dim']],
                initializer=tf.random_uniform_initializer(-1.0, 1.0))

            embedded_encoder_drop = tf.nn.dropout(tf.nn.embedding_lookup(embedding, self.seq_input),
                                                  self.dropout_keep_prob_embedding)
            embedded_decoder_drop = tf.nn.embedding_lookup(embedding, self.seq_output)

            encoder_input = tf.stack([embedded_encoder_drop[:, i, :] for i in range(self.options['input_seq_length'])], axis=1)
            decoder_input = tf.stack([embedded_decoder_drop[:, i, :] for i in range(self.options['output_seq_length'])], axis=1)
        #     encoder_input = [embedded_encoder_drop[:, i, :] for i in range(input_seq_length)]
        #     decoder_input = [embedded_decoder_drop[:, i, :] for i in range(output_seq_length)]

        for layer_i in range(self.options['encode_layers']):
            with tf.variable_scope('encoder%d'%layer_i), tf.device('/gpu:0'):
                cell_fw = tf.contrib.rnn.BasicLSTMCell(self.options['num_hidden'])
                cell_bw = tf.contrib.rnn.BasicLSTMCell(self.options['num_hidden'])

                (emb_encoder_inputs, emb_state) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    encoder_input,
                    sequence_length=self.article_lens,
                    dtype=tf.float32
                )

                encoder_input = tf.concat(emb_encoder_inputs, 2)

        encoder_output = encoder_input
        fw_state, bw_state = emb_state
        dec_in_state = tf.reshape(tf.concat(fw_state, 1), (self.options['batch_size'], self.options['num_hidden']*2))

        with tf.variable_scope("output_projection"):
            w = tf.get_variable(
                'w', [self.options['num_hidden'], self.options['input_vocab_size']], dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=1e-4))

            w_t = tf.transpose(w)
            v = tf.get_variable(
                'v', [self.options['input_vocab_size']], dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=1e-4))

            #we use the cell memory state for information on sentence embedding
        #     scores = tf.nn.xw_plus_b(encoder_state, W, b)
        #     y = tf.nn.softmax(scores)
        #     predictions = tf.argmax(scores, 1)

        with tf.variable_scope('decoder'), tf.device('/gpu:0'):
            cell = tf.contrib.rnn.BasicLSTMCell(self.options['num_hidden'])

            encoder_output = [tf.reshape(x, [self.options['batch_size'], 1, 2*self.options['num_hidden']]) 
                               for x in tf.split(encoder_output, self.options['input_seq_length'], axis=1)]
            enc_top_states = tf.concat(axis=1, values=encoder_output)

            print(enc_top_states.shape)

            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units=256,
                memory=enc_top_states)

            cell = tf.contrib.seq2seq.AttentionWrapper(
                cell,
                attention_mechanism,
                attention_layer_size=256,
                alignment_history=False)

        #         tf.contrib.legacy_seq2seq.attention_decoder()

            if self.options['mode'] == "train":
                helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
                    inputs=decoder_input,
                    sequence_length=self.abstract_lens,
                    embedding=embedding,
                    sampling_probability=0.9)

                initial_state = cell.zero_state(dtype=tf.float32, batch_size=self.options['batch_size'])
            elif self.options['mode'] == "infer":
                helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    embedding=embedding,
                    start_tokens=tf.tile([GO_SYMBOL], [batch_size]),
                    end_token=END_SYMBOL)

                initial_state = dec_in_state

            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=cell,
                helper=helper,
                initial_state=initial_state)

            final_outputs, decoder_state, exit_lens = tf.contrib.seq2seq.dynamic_decode(decoder)
            decoder_outputs = final_outputs.rnn_output

        # with tf.variable_scope('output'), tf.device('/gpu:0'):
        #     model_outputs = []
        #     for i in range(len(decoder_outputs)):
        #         if i > 0:
        #             tf.get_variable_scope().reuse_variables()
        #             model_outputs.append(tf.nn.xw_plus_b(tf.cast(decoder_outputs[i], tf.float32), w, v))
        with tf.variable_scope('output'), tf.device('/gpu:0'):
            model_outputs = []
            for i in range(self.options['batch_size']):
                tf.get_variable_scope().reuse_variables()
                model_outputs.append(tf.nn.xw_plus_b(decoder_outputs[:, exit_lens[i], :], w, v))
            
        with tf.variable_scope('loss'), tf.device('/gpu:0'):
            if self.options['mode'] == 'train':
                self._loss = tf.contrib.seq2seq.sequence_loss(decoder_outputs, self.targets, self.loss_weights)
            else:
                self._loss = tf.contrib.seq2seq.sequence_loss(model_outputs, self.targets, self.loss_weights)
                
            tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
            
    def add_train(self):
        """Sets self._train_op, op to run for training."""

        self._lr_rate = tf.maximum(
            self.options['min_lr'],  # min_lr_rate.
            tf.train.exponential_decay(self.options['lr'], self.global_step, 30000, 0.98))

        tvars = tf.trainable_variables()
        with tf.device('/gpu:0'):
            grads, global_norm = tf.clip_by_global_norm(tf.gradients(self._loss, tvars), self.options['max_grad_norm'])

        tf.summary.scalar('global_norm', global_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._lr_rate)
        tf.summary.scalar('learning_rate', self._lr_rate)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step, name='train_step')
            
    def build_graph(self):
        tf.reset_default_graph()
        self.add_placeholders()
        self.add_ops()
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        if self.options['mode'] == 'train':
            self.add_train()
        self._summaries = tf.summary.merge_all()

In [2]:
def train(model, data_batcher, log_root='./log_root', train_dir='./log_root/train', checkpoint_secs=60,
           max_run_steps=10000000):
    """Runs model training."""
    with tf.device('/cpu:0'):
        model.build_graph()
        saver = tf.train.Saver()
        
        summary_writer = tf.summary.FileWriter(train_dir)
        sv = tf.train.Supervisor(logdir=log_root,
                                 is_chief=True,
                                 saver=saver,
                                 summary_op=None,
                                 save_summaries_secs=60,
                                 save_model_secs=checkpoint_secs,
                                 global_step=model.global_step)
        
        sess = sv.prepare_or_wait_for_session(config=tf.ConfigProto(allow_soft_placement=True))
        running_avg_loss = 0
        step = 0
        
        while not sv.should_stop() and step < max_run_steps:
            (article_batch, abstract_batch, targets, article_lens, abstract_lens,
            loss_weights, _, _) = data_batcher.NextBatch()
            (_, summaries, loss, train_step) = model.run_train_step(
                sess, article_batch, abstract_batch, targets, article_lens,
                abstract_lens, loss_weights)

            summary_writer.add_summary(summaries, train_step)
            running_avg_loss = _RunningAvgLoss(running_avg_loss, loss, summary_writer, train_step)
            step += 1
            
            if step % 100 == 0:
                summary_writer.flush()

        sv.Stop()
        return running_avg_loss

In [3]:
model = TextSumarization({'a': 1, 'b': 2})
vocab = Vocab('vocab', 1000000)
batcher = Batcher('data_train.bin', vocab, model.config, 'article', 'abstract', 200, 20)
train(model, batcher)

FileNotFoundError: [Errno 2] No such file or directory: 'vocab'