In [None]:
import tensorflow as tf
from data import read_vocab, read_category, batch_iter, process_file, build_vocab
import os

In [None]:
class Config(object):
    embedding_dim = 128
    seq_length = 25
    num_classes = 3
    vocab_size = 6282
    trainable = True

    num_layers = 2
    hidden_dim = 128
    rnn='lstm'

    dropout_keep_prob = 0.8
    lr = 1e-3
    batch_size = 128
    num_epochs = 10

    print_per_batch = 100
    save_per_batch = 10


class LSTMModel(object):
    def __init__(self, config):
        self.config = config

        self.input_x = tf.placeholder(tf.int32, shape=[None, self.config.seq_length], name='input_x')
        self.input_y = tf.placeholder(tf.float32, shape=[None, self.config.num_classes], name='input_y')
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')

        self.rnn()

    def rnn(self):
        def lstm_cell():
            return tf.contrib.rnn.BasicLSTMCell(self.config.hidden_dim, state_is_tuple=True)

        def gru_cell():
            return tf.contrib.rnn.GRUCell(self.config.hidden_dim)

        def dropout():
            if self.config.rnn == 'lstm':
                cell = lstm_cell()
            else:
                cell = gru_cell()
            return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)

        with tf.device('/cpu:0'):
            embedding = tf.get_variable('embedding', [self.config.vocab_size, self.config.embedding_dim],
                                        trainable=self.config.trainable)  # [vocab_size, dim]
            self.embedding_inputs = tf.nn.embedding_lookup(embedding, self.input_x)  # [batch_size, maxlen, dim]

        with tf.name_scope("rnn"):
            cells = [dropout() for _ in range(self.config.num_layers)]
            rnn_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)

            self._outputs, self.state = tf.nn.dynamic_rnn(cell=rnn_cell, inputs=self.embedding_inputs, dtype=tf.float32)  # _outputs.shape=[batch_size, maxlen, dim]
            self.last = self._outputs[:, -1, :]  # [batch_size, hidden_dim]

        with tf.name_scope("score"):
            fc = tf.layers.dense(self.last, self.config.hidden_dim, name='fc1')  # [batch_size, hidden_dim]
            fc = tf.contrib.layers.dropout(fc, self.keep_prob)
            fc = tf.nn.relu(fc)

            self.logits = tf.layers.dense(fc, self.config.num_classes, name='fc2')  # [batch_size, num_classes]
            self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits), 1)

        with tf.name_scope("optimize"):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.input_y)
            self.loss = tf.reduce_mean(cross_entropy)

            self.optim = tf.train.AdamOptimizer(learning_rate=self.config.lr).minimize(self.loss)

        with tf.name_scope("accuracy"):
            correct_pred = tf.equal(tf.argmax(self.input_y, 1), self.y_pred_cls)
            self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [None]:
base_dir = './data/2019-07-19'
train_dir = os.path.join(base_dir, 'train.txt')
test_dir = os.path.join(base_dir, 'test.txt')
val_dir = os.path.join(base_dir, 'val.txt')
vocab_dir = os.path.join(base_dir, 'vocab.txt')

words, word2id = read_vocab(vocab_dir)
categories, cat2id = read_category()

x_train, y_train = process_file(train_dir, word2id, cat2id, 25)
x_input = x_train[:128]
y_input = y_train[:128]

In [None]:
config = Config()
config.vocab_size = len(words)
model = LSTMModel(config)

In [None]:
feed_dict = {
    model.input_x: x_input,
    model.input_y: y_input,
    model.keep_prob: 1.0
}
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    emb = sess.run(model.embedding_inputs, feed_dict=feed_dict)
    out = sess.run(model._outputs, feed_dict=feed_dict)
    state = sess.run(model.state, feed_dict=feed_dict)
    la = sess.run(model.last, feed_dict=feed_dict)

In [None]:
a = out[:,-1,:]
a

In [None]:
state[1].h

In [None]:
emb.shape