# CS 20 : TensorFlow for Deep Learning Research
## Lecture 11 : Recurrent Neural Networks
Simple example for Many to Many Classification (Simple pos tagger) by Recurrent Neural Networks. 

### Many to Many Classification by RNN
- Creating the **data pipeline** with `tf.data`
- Preprocessing word sequences (variable input sequence length) using `padding technique` by `user function (pad_seq)`
- Using `tf.nn.embedding_lookup` for getting vector of tokens (eg. word, character)
- Training **many to many classification** with `tf.contrib.seq2seq.sequence_loss`
- Masking unvalid token with `tf.sequence_mask`
- Creating the model as **Class**
- Reference
    - https://github.com/aisolab/sample_code_of_Deep_learning_Basics/blob/master/DLEL/DLEL_12_2_RNN_(toy_example).ipynb

### Setup

In [1]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import string
%matplotlib inline
from pprint import pprint

slim = tf.contrib.slim
print(tf.__version__)

1.11.0


In [2]:
from tensorflow import keras

### Prepare example data 

In [3]:
sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
pos = [['pronoun', 'verb', 'adjective'],
     ['noun', 'verb', 'adverb', 'adjective'],
     ['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun'],
     ['noun', 'verb', 'adverb', 'adjective', 'verb']]

In [4]:
# word dic
word_list = []
for sentence in sentences: # ['I', 'feel', 'hungry']
    print(sentence)
    word_list += sentence
print('==='*30)
pprint(word_list)
print('>>len(word_list): {}'.format(len(word_list)))

['I', 'feel', 'hungry']
['tensorflow', 'is', 'very', 'difficult']
['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning']
['tensorflow', 'is', 'very', 'fast', 'changing']
['I',
 'feel',
 'hungry',
 'tensorflow',
 'is',
 'very',
 'difficult',
 'tensorflow',
 'is',
 'a',
 'framework',
 'for',
 'deep',
 'learning',
 'tensorflow',
 'is',
 'very',
 'fast',
 'changing']
>>len(word_list): 19


In [5]:
word_list = list(set(word_list))
pprint(word_list)
print('>>len(word_list): {}'.format(len(word_list)))

['changing',
 'framework',
 'fast',
 'is',
 'learning',
 'tensorflow',
 'hungry',
 'a',
 'feel',
 'very',
 'deep',
 'I',
 'for',
 'difficult']
>>len(word_list): 14


In [6]:
word_list.sort() #알파벳순 정렬 
word_list = ['<pad>'] + word_list

word_dic = {word : idx for idx, word in enumerate(word_list)}
pprint(word_dic)

{'<pad>': 0,
 'I': 1,
 'a': 2,
 'changing': 3,
 'deep': 4,
 'difficult': 5,
 'fast': 6,
 'feel': 7,
 'for': 8,
 'framework': 9,
 'hungry': 10,
 'is': 11,
 'learning': 12,
 'tensorflow': 13,
 'very': 14}


In [7]:
# pos dic
pos_list = []
for elm in pos:
    pos_list += elm
pos_list = list(set(pos_list))
pos_list.sort()
pos_list = ['<pad>'] + pos_list
print(pos_list)

pos_dic = {pos : idx for idx, pos in enumerate(pos_list)}
pos_dic

['<pad>', 'adjective', 'adverb', 'determiner', 'noun', 'preposition', 'pronoun', 'verb']


{'<pad>': 0,
 'adjective': 1,
 'adverb': 2,
 'determiner': 3,
 'noun': 4,
 'preposition': 5,
 'pronoun': 6,
 'verb': 7}

In [8]:
pos_idx_to_dic = {elm[1] : elm[0] for elm in pos_dic.items()}
pos_idx_to_dic

{0: '<pad>',
 1: 'adjective',
 2: 'adverb',
 3: 'determiner',
 4: 'noun',
 5: 'preposition',
 6: 'pronoun',
 7: 'verb'}

### Pre-process data

#### Sentence padding 
- word2idx
- keras.preprocessing.sequence.pad_sequence 

In [9]:
for sentence in sentences:
    pprint(sentence)

['I', 'feel', 'hungry']
['tensorflow', 'is', 'very', 'difficult']
['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning']
['tensorflow', 'is', 'very', 'fast', 'changing']


In [10]:
pprint(word_dic)

{'<pad>': 0,
 'I': 1,
 'a': 2,
 'changing': 3,
 'deep': 4,
 'difficult': 5,
 'fast': 6,
 'feel': 7,
 'for': 8,
 'framework': 9,
 'hungry': 10,
 'is': 11,
 'learning': 12,
 'tensorflow': 13,
 'very': 14}


In [11]:
word2idx = []
X_length = []
for sentence in sentences:
    word2idx.append(list(word_dic.get(word) for word in sentence))
    X_length.append(len(sentence))

In [12]:
print(word2idx)

[[1, 7, 10], [13, 11, 14, 5], [13, 11, 2, 9, 8, 4, 12], [13, 11, 14, 6, 3]]


In [13]:
max_length = 10 
X_indices = keras.preprocessing.sequence.pad_sequences(sequences=word2idx,
                                                                 maxlen=max_length,
                                                                 dtype='int32',
                                                                 padding='post')

In [14]:
print(X_indices)
print('X_length : {}'.format(X_length))
print('X_indices.shape : {}'.format(np.shape(X_indices)))

[[ 1  7 10  0  0  0  0  0  0  0]
 [13 11 14  5  0  0  0  0  0  0]
 [13 11  2  9  8  4 12  0  0  0]
 [13 11 14  6  3  0  0  0  0  0]]
X_length : [3, 4, 7, 5]
X_indices.shape : (4, 10)


#### Pos padding 
- pos2idx

In [15]:
pos2idx = []
for po in pos:
    pos2idx.append(list(pos_dic.get(p) for p in po ))
print(pos2idx)

[[6, 7, 1], [4, 7, 2, 1], [4, 7, 3, 4, 5, 1, 4], [4, 7, 2, 1, 7]]


In [16]:
y = keras.preprocessing.sequence.pad_sequences(sequences=pos2idx,
                                              maxlen=max_length,
                                              padding='post')

In [17]:
print(y)
print('y shape : {}'.format(y.shape))

[[6 7 1 0 0 0 0 0 0 0]
 [4 7 2 1 0 0 0 0 0 0]
 [4 7 3 4 5 1 4 0 0 0]
 [4 7 2 1 7 0 0 0 0 0]]
y shape : (4, 10)


### Define SimPosRNN

In [18]:
class SimPosRNN:
    def __init__(self, X_length, X_indices, y, n_of_classes, hidden_dim, max_len, word_dic):
        
        # Data pipeline
        with tf.variable_scope('input_layer'):
            self._X_length = X_length # [3, 4, 7, 5]
            self._X_indices = X_indices # (4, 10)
            self._y = y
            
            one_hot = tf.eye(len(word_dic), dtype = tf.float32)
            self._one_hot = tf.get_variable(name='one_hot_embedding', initializer = one_hot,
                                            trainable = False) # embedding vector training 안할 것이기 때문
            self._X_batch = tf.nn.embedding_lookup(params = self._one_hot, ids = self._X_indices)
            
        # RNN cell (many to many)
        with tf.variable_scope('rnn_cell'):
            rnn_cell = tf.nn.rnn_cell.BasicRNNCell(num_units = hidden_dim,
                                                   activation = tf.nn.tanh)
#             rnn_cell = tf.contrib.rnn.BasicRNNCell(num_units = hidden_dim,
#                                                    activation = tf.nn.tanh)
#             score_cell = tf.nn.rnn_cell.OutputProjectionWrapper()
            score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = rnn_cell, output_size = n_of_classes)
            self._outputs, _ = tf.nn.dynamic_rnn(cell = score_cell, inputs = self._X_batch,
                                                 sequence_length = self._X_length,
                                                 dtype = tf.float32)
        
        with tf.variable_scope('seq2seq_loss'):
            masks = tf.sequence_mask(lengths = self._X_length, maxlen = max_len, dtype = tf.float32)
            self.seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = self._outputs, targets = self._y,
                                                                 weights = masks)
    
        with tf.variable_scope('prediction'):
            self._prediction = tf.argmax(input = self._outputs,
                                         axis = 2, output_type = tf.int32)
    
    def predict(self, sess, X_length, X_indices):
        feed_prediction = {self._X_length : X_length, self._X_indices : X_indices}
        return sess.run(self._prediction, feed_dict = feed_prediction)

### Create a model of SimPosRNN

In [19]:
# hyper-parameter#
lr = .003
epochs = 100
batch_size = 2
total_step = int(np.shape(X_indices)[0] / batch_size)
print(total_step)

2


In [20]:
## create data pipeline with tf.data
tr_dataset = tf.data.Dataset.from_tensor_slices((X_length, X_indices, y))
tr_dataset = tr_dataset.shuffle(buffer_size = 20)
tr_dataset = tr_dataset.batch(batch_size = batch_size)
tr_iterator = tr_dataset.make_initializable_iterator()
print(tr_dataset)

<BatchDataset shapes: ((?,), (?, 10), (?, 10)), types: (tf.int32, tf.int32, tf.int32)>


In [21]:
X_length_mb, X_indices_mb, y_mb = tr_iterator.get_next()

In [22]:
sim_pos_rnn = SimPosRNN(X_length = X_length_mb, X_indices = X_indices_mb, y = y_mb,
                        n_of_classes = 8, hidden_dim = 16, max_len = max_length, word_dic = word_dic)

### Creat training op and train model

In [23]:
## create training op
opt = tf.train.AdamOptimizer(learning_rate = lr)
training_op = opt.minimize(loss = sim_pos_rnn.seq2seq_loss)

In [24]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

tr_loss_hist = []

for epoch in range(epochs):
    avg_tr_loss = 0
    tr_step = 0
    
    sess.run(tr_iterator.initializer)
    try:
        while True:
            _, tr_loss = sess.run(fetches = [training_op, sim_pos_rnn.seq2seq_loss])
            avg_tr_loss += tr_loss
            tr_step += 1
            
    except tf.errors.OutOfRangeError:
        pass
    
    avg_tr_loss /= tr_step
    tr_loss_hist.append(avg_tr_loss)
    if (epoch + 1) % 10 == 0:
        print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch + 1, avg_tr_loss))

epoch :  10, tr_loss : 1.626
epoch :  20, tr_loss : 1.191
epoch :  30, tr_loss : 0.825
epoch :  40, tr_loss : 0.548
epoch :  50, tr_loss : 0.368
epoch :  60, tr_loss : 0.262
epoch :  70, tr_loss : 0.192
epoch :  80, tr_loss : 0.146
epoch :  90, tr_loss : 0.112
epoch : 100, tr_loss : 0.088


In [25]:
yhat = sim_pos_rnn.predict(sess = sess, X_length = X_length, X_indices = X_indices)
yhat

array([[6, 7, 1, 0, 0, 0, 0, 0, 0, 0],
       [4, 7, 2, 1, 0, 0, 0, 0, 0, 0],
       [4, 7, 3, 4, 5, 1, 4, 0, 0, 0],
       [4, 7, 2, 1, 7, 0, 0, 0, 0, 0]])

In [26]:
y

array([[6, 7, 1, 0, 0, 0, 0, 0, 0, 0],
       [4, 7, 2, 1, 0, 0, 0, 0, 0, 0],
       [4, 7, 3, 4, 5, 1, 4, 0, 0, 0],
       [4, 7, 2, 1, 7, 0, 0, 0, 0, 0]])

In [27]:
yhat = [list(map(lambda elm : pos_idx_to_dic.get(elm), row)) for row in yhat]
for elm in yhat:
    print(elm)

['pronoun', 'verb', 'adjective', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['noun', 'verb', 'adverb', 'adjective', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun', '<pad>', '<pad>', '<pad>']
['noun', 'verb', 'adverb', 'adjective', 'verb', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
