In [1]:
import numpy as np
from tqdm import tqdm

from keras.datasets import imdb
from keras.preprocessing import sequence
from keras.utils import to_categorical

import tensorflow as tf
from tensorflow.contrib import rnn

Using TensorFlow backend.


In [2]:
# Loading the dataset
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [3]:
y_test_one_hot = to_categorical(y_test)
y_train_one_hot = to_categorical(y_train)
y_test_one_hot.shape, y_train_one_hot.shape

((25000, 2), (25000, 2))

In [4]:
# Preprocessing sestences
max_review_len = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_len)

In [5]:
NUM_EPOCHS = 3

BATCH_SIZE = 64

NUM_CLASS = 2

VOCAB_SIZE = 5000
EMBEDDING_SIZE = 32

RNN_SIZE = 50
NUM_CHUNK = 10
CHUNK_SIZE = 50

In [6]:
text_placeholder = tf.placeholder(tf.int32, [None, max_review_len])
sentiment_placeholder = tf.placeholder(tf.float32)

In [7]:
def weight_variable(shape):
    weights = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(weights)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [9]:
with tf.name_scope('Embedding'):
    embedding_weights = weight_variable([VOCAB_SIZE, EMBEDDING_SIZE])
    embedding_bias = bias_variable([EMBEDDING_SIZE])
    
    embedding = tf.nn.embedding_lookup(embedding_weights, text_placeholder) + embedding_bias

with tf.name_scope('RNN'):
    lstm_weights = weight_variable([RNN_SIZE, NUM_CLASS])
    lstm_bias = bias_variable([NUM_CLASS])
    
    lstm = rnn.BasicLSTMCell(RNN_SIZE)
    
    #embedding_transpose = tf.transpose(embedding, [1, 0, 2])
    #embedding_reshaped = tf.reshape(embedding_transpose, [-1, CHUNK_SIZE])
    #embedding_split = tf.split(embedding_reshaped, NUM_CHUNK, 0)
    
    #outputs, states = rnn.static_rnn(lstm, embedding_split, dtype=tf.float32)
    
    outputs, states = tf.nn.dynamic_rnn(lstm, embedding, dtype=tf.float32)
    
    value = tf.transpose(outputs, [1, 0, 2])
    last = tf.gather(value, int(value.get_shape()[0]) - 1)
    
    pred = tf.matmul(last, lstm_weights) + lstm_bias
    

In [10]:
loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=sentiment_placeholder, logits=pred))
train_op = tf.train.AdamOptimizer().minimize(loss_op)

# Evaluations
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(sentiment_placeholder, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


init_op = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init_op)
    
    # Summaries
    LOG_DIR = "tmp/log/"
    summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
    summary_writer.close()
    
    for epoch in range(NUM_EPOCHS):
        print('Epoch {}'.format(epoch + 1))

        for index, offset in tqdm(list(enumerate(range(0, X_train.shape[0], BATCH_SIZE))), ncols=100):
            xs, ys = X_train[offset: offset + BATCH_SIZE], y_train_one_hot[offset: offset + BATCH_SIZE]
            sess.run(train_op, feed_dict={
                text_placeholder: xs,
                sentiment_placeholder: ys
            })

        train_accuracy = accuracy.eval(feed_dict={
            text_placeholder: X_train,
            sentiment_placeholder: y_train_one_hot
        })
        validation_accuracy = accuracy.eval(feed_dict={
            text_placeholder: X_test,
            sentiment_placeholder: y_test_one_hot
        })
        print('Training Accuracy: {}%\nValidation Accuracy: {}%\n'.format(train_accuracy, validation_accuracy))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1


100%|█████████████████████████████████████████████████████████████| 391/391 [02:39<00:00,  2.45it/s]


Training Accuracy: 0.8695200085639954%
Validation Accuracy: 0.852840006351471%

Epoch 2


100%|█████████████████████████████████████████████████████████████| 391/391 [02:38<00:00,  2.46it/s]


Training Accuracy: 0.896399974822998%
Validation Accuracy: 0.8634399771690369%

Epoch 3


100%|█████████████████████████████████████████████████████████████| 391/391 [02:42<00:00,  2.40it/s]


Training Accuracy: 0.9132400155067444%
Validation Accuracy: 0.8684800267219543%

