<a href="https://colab.research.google.com/github/Lainey1225/tmo3/blob/main/NLP/rnn_sentiment_Songlin%20Li_%20Ting%20Mo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com) & [Vahid Mirjalili](http://vahidmirjalili.com), Packt Publishing Ltd. 2019

Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition

Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt)

# Chapter 16: Modeling Sequential Data Using Recurrent Neural Networks (Part 1/2)

Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s).

In [None]:


from IPython.display import Image
%matplotlib inline

# Introducing sequential data

## Modeling sequential data⁠—order matters

## Representing sequences



In [None]:
#Image(filename='#Images/16_01.png', width=700)

## The different categories of sequence modeling

In [None]:
#Image(filename='#Images/16_02.png', width=700)

# RNNs for modeling sequences

## Understanding the RNN looping mechanism


In [None]:
#Image(filename='#Images/16_03.png', width=700)

In [None]:
#Image(filename='#Images/16_04.png', width=700)

## Computing activations in an RNN


In [None]:
#Image(filename='#Images/16_05.png', width=700)

In [None]:
#Image(filename='#Images/16_06.png', width=700)

## Hidden-recurrence vs. output-recurrence

In [None]:
#Image(filename='#Images/16_07.png', width=700)

In [None]:
import tensorflow as tf
tf.random.set_seed(1)

rnn_layer = tf.keras.layers.SimpleRNN(
    units=2, use_bias=True, 
    return_sequences=True)
rnn_layer.build(input_shape=(None, None, 5))

w_xh, w_oo, b_h = rnn_layer.weights

print('W_xh shape:', w_xh.shape)
print('W_oo shape:', w_oo.shape)
print('b_h shape:', b_h.shape)

W_xh shape: (5, 2)
W_oo shape: (2, 2)
b_h shape: (2,)


In [None]:
x_seq = tf.convert_to_tensor(
    [[1.0]*5, [2.0]*5, [3.0]*5],
    dtype=tf.float32)


## output of SimepleRNN:
output = rnn_layer(tf.reshape(x_seq, shape=(1, 3, 5)))

## manually computing the output:
out_man = []
for t in range(len(x_seq)):
    xt = tf.reshape(x_seq[t], (1, 5))
    print('Time step {} =>'.format(t))
    print('   Input           :', xt.numpy())
    
    ht = tf.matmul(xt, w_xh) + b_h    
    print('   Hidden          :', ht.numpy())
    
    if t>0:
        prev_o = out_man[t-1]
    else:
        prev_o = tf.zeros(shape=(ht.shape))
        
    ot = ht + tf.matmul(prev_o, w_oo)
    ot = tf.math.tanh(ot)
    out_man.append(ot)
    print('   Output (manual) :', ot.numpy())
    print('   SimpleRNN output:'.format(t), output[0][t].numpy())
    print()

Time step 0 =>
   Input           : [[1. 1. 1. 1. 1.]]
   Hidden          : [[0.41464037 0.96012145]]
   Output (manual) : [[0.39240566 0.74433106]]
   SimpleRNN output: [0.39240566 0.74433106]

Time step 1 =>
   Input           : [[2. 2. 2. 2. 2.]]
   Hidden          : [[0.82928073 1.9202429 ]]
   Output (manual) : [[0.80116504 0.99129474]]
   SimpleRNN output: [0.80116504 0.99129474]

Time step 2 =>
   Input           : [[3. 3. 3. 3. 3.]]
   Hidden          : [[1.243921  2.8803642]]
   Output (manual) : [[0.95468265 0.99930704]]
   SimpleRNN output: [0.95468265 0.99930704]



## The challenges of learning long-range interactions


In [None]:
#Image(filename='#Images/16_08.png', width=700)


## Long Short-Term Memory cells 

In [None]:
#Image(filename='#Images/16_09.png', width=700)

# Implementing RNNs for sequence modeling in TensorFlow

## Project one: predicting the sentiment of IMDb movie reviews

### Preparing the movie review data



In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import pandas as pd
import os


In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/tleitch/BDML/main/NLP/sTrain.csv')
df = df.drop(df.columns[[0]],axis=1)
df.tail()

Unnamed: 0,review,sentiment
30362,We arrived late at night and walked in to a ch...,1
30363,The only positive impression is location and p...,0
30364,Traveling with friends for shopping and a show...,0
30365,The experience was just ok. We paid extra for ...,0
30366,The Westin is a wonderfully restored grande da...,1


In [None]:
# Step 1: Create a dataset

target = df.pop('sentiment')

ds_raw = tf.data.Dataset.from_tensor_slices(
    (df.values, target.values))

## inspection:
for ex in ds_raw.take(3):
    tf.print(ex[0].numpy()[0][:50], ex[1])

b'The room was kind of clean but had a VERY strong s' 0
b'I booked this hotel through Hotwire at the lowest ' 0
b'Stayed here with husband and sons on the way to an' 1


 * **Train/validaiton/test splits**

In [None]:
tf.random.set_seed(1)

ds_raw = ds_raw.shuffle(
    50000, reshuffle_each_iteration=False)

ds_raw_test = ds_raw.take(25000)
ds_raw_train_valid = ds_raw.skip(25000)
ds_raw_train = ds_raw_train_valid.take(20000)
ds_raw_valid = ds_raw_train_valid.skip(20000)

 * **Tokenizer and Encoder**
   * `tfds.deprecated.text.Tokenizer`: https://www.tensorflow.org/datasets/api_docs/python/tfds/deprecated/text/Tokenizer
   * `tfds.deprecated.text.TokenTextEncoder`: https://www.tensorflow.org/datasets/api_docs/python/tfds/features/text/TokenTextEncoder

 * **Encoding sequences: keeping the last 100 items in each sequence**

In [None]:
## Step 2: find unique tokens (words)

from collections import Counter

tokenizer = tfds.deprecated.text.Tokenizer()
token_counts = Counter()

for example in ds_raw_train:
    tokens = tokenizer.tokenize(example[0].numpy()[0])
    token_counts.update(tokens)
    
print('Vocab-size:', len(token_counts))

Vocab-size: 22798


In [None]:
## Step 3: encoding each unique token into integers

encoder = tfds.deprecated.text.TokenTextEncoder(token_counts)

example_str = 'This is an example!'
encoder.encode(example_str)

[105, 63, 33, 1374]

In [None]:
## Step 3-A: define the function for transformation

def encode(text_tensor, label):
    text = text_tensor.numpy()[0]
    encoded_text = encoder.encode(text)
    return encoded_text, label

## Step 3-B: wrap the encode function to a TF Op.
def encode_map_fn(text, label):
    return tf.py_function(encode, inp=[text, label], 
                          Tout=(tf.int64, tf.int64))

In [None]:
ds_train = ds_raw_train.map(encode_map_fn)
ds_valid = ds_raw_valid.map(encode_map_fn)
ds_test = ds_raw_test.map(encode_map_fn)

tf.random.set_seed(1)
for example in ds_train.shuffle(1000).take(5):
    print('Sequence length:', example[0].shape)
    
example

Sequence length: (454,)
Sequence length: (137,)
Sequence length: (103,)
Sequence length: (269,)
Sequence length: (63,)


(<tf.Tensor: shape=(63,), dtype=int64, numpy=
 array([ 213,   57,  193,  450,  468,   17, 1522,  114,  467,  976, 1392,
          37,  728,   40,  375,   38, 1012, 8786, 7124,    6, 1139,  647,
        5808,   17, 3759,  204, 8787,  323,  997, 1634,  717, 1655,   17,
        8154,   41,  757, 1996,  998,  682, 5456,   11,  452, 8788,   17,
        3281,   21, 5426,  730,  248,   17, 1589,   65,   57, 1637, 7682,
         938,   17,  939, 8789,  831,  262,  145,  287])>,
 <tf.Tensor: shape=(), dtype=int64, numpy=1>)

 * **batch() vs. padded_batch()**

```python

# this will result in error


BATCH_SIZE = 32
train_data = all_encoded_data.batch(BATCH_SIZE)

next(iter(train_data))

# Running this will result in error
# We cannot apply .batch() to this dataset
```

In [None]:
## Take a small subset

ds_subset = ds_train.take(8)
for example in ds_subset:
    print('Individual size:', example[0].shape)

## batching the datasets
ds_batched = ds_subset.padded_batch(
    4, padded_shapes=([-1], []))

for batch in ds_batched:
    print('Batch dimension:', batch[0].shape)

Individual size: (303,)
Individual size: (291,)
Individual size: (92,)
Individual size: (316,)
Individual size: (232,)
Individual size: (132,)
Individual size: (219,)
Individual size: (121,)
Batch dimension: (4, 316)
Batch dimension: (4, 232)


In [None]:
## batching the datasets
train_data = ds_train.padded_batch(
    32, padded_shapes=([-1],[]))

valid_data = ds_valid.padded_batch(
    32, padded_shapes=([-1],[]))

test_data = ds_test.padded_batch(
    32, padded_shapes=([-1],[]))

### Embedding layers for sentence encoding


 * `input_dim`: number of words, i.e. maximum integer index + 1.
 * `output_dim`: 
 * `input_length`: the length of (padded) sequence
    * for example, `'This is an example' -> [0, 0, 0, 0, 0, 0, 3, 1, 8, 9]`   
    => input_lenght is 10
 
 

 * When calling the layer, takes integr values as input,   
 the embedding layer convert each interger into float vector of size `[output_dim]`
   * If input shape is `[BATCH_SIZE]`, output shape will be `[BATCH_SIZE, output_dim]`
   * If input shape is `[BATCH_SIZE, 10]`, output shape will be `[BATCH_SIZE, 10, output_dim]`

In [None]:
#Image(filename='#Images/16_10.png', width=700)

In [None]:
from tensorflow.keras.layers import Embedding


model = tf.keras.Sequential()

model.add(Embedding(input_dim=100,
                    output_dim=6,
                    input_length=20,
                    name='embed-layer'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embed-layer (Embedding)      (None, 20, 6)             600       
Total params: 600
Trainable params: 600
Non-trainable params: 0
_________________________________________________________________


### Building an RNN model

* **Keras RNN layers:**
  * `tf.keras.layers.SimpleRNN(units, return_sequences=False)`
  * `tf.keras.layers.LSTM(..)`
  * `tf.keras.layers.GRU(..)`
  * `tf.keras.layers.Bidirectional()`
 
* **Determine `return_sequenes=?`**
  * In a multi-layer RNN, all RNN layers except the last one should have `return_sequenes=True`
  * For the last RNN layer, decide based on the type of problem: 
     * many-to-many: -> `return_sequences=True`
     * many-to-one : -> `return_sequenes=False`
     * ..
    

In [None]:
## An example of building a RNN model
## with SimpleRNN layer

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Embedding(1000, 32))
model.add(SimpleRNN(32, return_sequences=True))
model.add(SimpleRNN(32))
model.add(Dense(1))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 32)          32000     
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, None, 32)          2080      
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 32)                2080      
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 36,193
Trainable params: 36,193
Non-trainable params: 0
_________________________________________________________________


In [None]:
## An example of building a RNN model
## with LSTM layer


from tensorflow.keras.layers import LSTM


model = Sequential()
model.add(Embedding(10000, 32))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 32)          320000    
_________________________________________________________________
lstm (LSTM)                  (None, None, 32)          8320      
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 336,673
Trainable params: 336,673
Non-trainable params: 0
_________________________________________________________________


In [None]:
## An example of building a RNN model
## with GRU layer
from tensorflow.keras.layers import GRU

model = Sequential()
model.add(Embedding(10000, 32))
model.add(GRU(32, return_sequences=True))
model.add(GRU(32))
model.add(Dense(1))
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 32)          320000    
_________________________________________________________________
gru (GRU)                    (None, None, 32)          6336      
_________________________________________________________________
gru_1 (GRU)                  (None, 32)                6336      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 332,705
Trainable params: 332,705
Non-trainable params: 0
_________________________________________________________________
