## Preprocess

In [0]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU

In [0]:
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isnâ€™t guaranteed
gpu = GPUs[0]
def printm():
  process = psutil.Process(os.getpid())
  print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
  print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

In [0]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

print(tf.__version__)

In [0]:
imdb = keras.datasets.imdb
(train_text, _), (test_text, _) = imdb.load_data(num_words=10000)

# A dictionary mapping words to an integer index
word_index = imdb.get_word_index()

In [0]:
# The first indices are reserved
vocab = {k: (v + 3) for k, v in word_index.items()} 
vocab["<PAD>"] = 0
vocab["<START>"] = 1
vocab["<UNK>"] = 2  # unknown
vocab["<UNUSED>"] = 3
vocab["<END>"] = len(vocab)

reverse_word_index = dict([(value, key) for (key, value) in vocab.items()])

train_X, train_y = [], []
for t in train_text:
  train_X.append(t[1:])
  train_y.append(t + [vocab["<END>"]])
  
test_X, test_y = [], []
for t in test_text:
  test_X.append(t[1:])
  test_y.append(t + [vocab["<END>"]])

print("Training entries: {}".format(len(train_X)))

In [0]:
train_X = keras.preprocessing.sequence.pad_sequences(train_X,
                                                    value=vocab["<PAD>"],
                                                    padding='post',
                                                    maxlen=256)

train_y = keras.preprocessing.sequence.pad_sequences(train_y,
                                                    value=vocab["<PAD>"],
                                                    padding='post',
                                                    maxlen=256)

test_X = keras.preprocessing.sequence.pad_sequences(test_X,
                                                   value=vocab["<PAD>"],
                                                   padding='post',
                                                   maxlen=256)

test_y = keras.preprocessing.sequence.pad_sequences(test_y,
                                                   value=vocab["<PAD>"],
                                                   padding='post',
                                                   maxlen=256)

In [0]:
def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])
  
print(decode_review(train_X[0]))
print(decode_review(train_y[0]))
print(decode_review(train_text[0]))

In [0]:
print(len(train_X[0]), len(train_y[0]))

In [0]:
chars = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}"
alphabet = {c: i for i, c in enumerate(chars)}

In [0]:
temp_data = []

for text in train_X:
  sent = []
  
  for word_index in text:
    word = []
    token = reverse_word_index[word_index]
    
    if token in {"<PAD>", "<UNK>", "<UNUSED>"}:
      word.append(0)
    else:
      for char in token:
        if not char in alphabet:
          continue
        word.append(alphabet[char])
      
    sent.append(word)
    
  temp_data.append(sent)
  
train_X = temp_data

In [0]:
temp_data = []

for sent in train_X:
  temp_data.append(keras.preprocessing.sequence.pad_sequences(sent,
                                                              value=0,
                                                              padding='post',
                                                              maxlen=16))

train_X = np.asarray(temp_data)

In [0]:
print(train_X.shape)

In [0]:
config = {"SENT_LENGTH": 256,
          "CHAR_LENGTH": 16,
          "NUM_RNN_LAYER": 2,
          "RNN_LAYER_NODES": [
              64,
              64
          ]
         }

In [0]:
class ELMoModel:
  def __init__(self, params):
    self.params = params
    tf.reset_default_graph()
    self._create_placeholder()
    self._create_model()
    self._create_loss()
    self._create_optimizer()
    self.sess = tf.Session()
  
  
  def _create_placeholder(self):
    with tf.name_scope("input"):
      self.input_text = tf.placeholder(tf.int32, 
                                       shape=[None, self.params["SENT_LENGTH"], self.params["CHAR_LENGTH"]], 
                                       name="input_text")
      self.dropout_keep_prob = tf.placeholder(tf.float64, name="dropout_keep_prob")
      self.y = tf.placeholder(tf.int32, 
                             shape=[None, self.params["SENT_LENGTH"]], 
                             name="input_label")
  
  
  def _create_model(self):
    char_emb = np.eye((len(alphabet)))
    char_emb[0, 0] = 0

    with tf.name_scope("char_embedding"):
      
      char_emb_tensor = tf.Variable(char_emb)
      emb_text = tf.nn.embedding_lookup(char_emb_tensor, self.input_text[:, :-2, :])
      emb_text = tf.expand_dims(emb_text, -1)
      emb_text_word = tf.unstack(emb_text, axis=1)
      print(emb_text.shape)
    
    with tf.name_scope("conv_layer") as scope:

      print("------- build conv layer --------")

      two_gram = [tf.squeeze(self.conv_2d(x, 2, len(alphabet), 3, scope + 'two_gram'), axis=2) for x in emb_text_word]
      two_gram = [tf.reduce_max(x, axis=1) for x in two_gram]

      three_gram = [tf.squeeze(self.conv_2d(x, 3, len(alphabet), 4, scope + 'three_gram'), axis=2) for x in emb_text_word]
      three_gram = [tf.reduce_max(x, axis=1) for x in three_gram]

      four_gram = [tf.squeeze(self.conv_2d(x, 4, len(alphabet), 5, scope + 'four_gram'), axis=2) for x in emb_text_word]
      four_gram = [tf.reduce_max(x, axis=1) for x in four_gram]

      five_gram = [tf.squeeze(self.conv_2d(x, 5, len(alphabet), 6, scope + 'five_gram'), axis=2) for x in emb_text_word]
      five_gram = [tf.reduce_max(x, axis=1) for x in five_gram]

      emb_text_word = [tf.concat(out, 1) for out in zip(two_gram, three_gram, four_gram, five_gram)]
      print(emb_text_word[0].shape)
      emb_text = tf.stack(emb_text_word, axis=1)
        
    with tf.name_scope("rnn_layer") as scope:
      
      print("------- build RNN layer --------")
      
      lstm_cell_fw = tf.nn.rnn_cell.MultiRNNCell(
          [tf.contrib.rnn.LSTMCell(self.params["RNN_LAYER_NODES"][i], name="rnn_cell_fw_" + str(i)) for i in range(self.params["NUM_RNN_LAYER"])])
      lstm_cell_bw = tf.nn.rnn_cell.MultiRNNCell(
          [tf.contrib.rnn.LSTMCell(self.params["RNN_LAYER_NODES"][i], name="rnn_cell_bw_" + str(i)) for i in range(self.params["NUM_RNN_LAYER"])])
      
      lstm_out, self.hidden_state = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_cell_fw,
                                                                    cell_bw=lstm_cell_bw,
                                                                    inputs=emb_text,
                                                                    dtype=tf.float64)
      
      fw_out = tf.layers.dense(inputs=lstm_out[0], units=len(vocab), activation=tf.nn.relu)
      bw_out = tf.layers.dense(inputs=lstm_out[1], units=len(vocab), activation=tf.nn.relu)
      
      self.out_1 = tf.nn.softmax(fw_out, axis=-1)
      self.out_2 = tf.nn.softmax(bw_out, axis=-1)

  
  def _create_loss(self):
    
    with tf.name_scope("loss") as scope:
      loss_fw = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.out_1, labels=self.y[:, 2:])
      loss_bw = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.out_2, labels=self.y[:, :-2])

      self.loss = tf.reduce_mean(loss_fw + loss_bw)
  
  
  def _create_optimizer(self):
    self.optimizer = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(self.loss)
    
    
  def save_model(self):
    pass
  
  def conv_2d(self, input_, height, width, out, scope):
    return tf.contrib.layers.conv2d(input_, out, [height, width], padding="VALID", reuse=tf.AUTO_REUSE, scope=scope)
  
  def train(self, train_X, train_y, batch_size):
    
      self.sess.run(tf.global_variables_initializer())
  
      global_step = 0
      for epoch in range(5):
        batch = 0
        while batch < len(train_X):
          _, loss = self.sess.run([self.optimizer, self.loss], 
                             feed_dict={self.input_text: train_X[batch: batch + batch_size],
                                        self.y: train_y[batch: batch + batch_size]
                                       }
                            )
          batch += batch_size
          global_step += 1
          print("step {}, loss {}".format(global_step, loss))
    
    
model = ELMoModel(config)

In [0]:
model.train(train_X, train_y, 4)