In [1]:
import tensorflow as tf
tf.random.set_seed(1)

In [2]:
rnn_layer = tf.keras.layers.SimpleRNN(
    units=2, use_bias=True,
    return_sequences=True)

rnn_layer.build(input_shape=(None, None, 5))

2022-02-03 17:11:04.368940: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2022-02-03 17:11:04.509378: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1006] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-03 17:11:04.510109: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: 
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.545
pciBusID: 0000:01:00.0
2022-02-03 17:11:04.510179: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1006] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-03 17:11:04.510833: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 1 with properties: 
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.545
pciBusID: 0000

In [3]:
w_xh, w_oo, b_h = rnn_layer.weights

In [4]:
print('W_xh shape:', w_xh.shape)
print('W_oo shape:', w_oo.shape)
print('b_h shape:', b_h.shape)

W_xh shape: (5, 2)
W_oo shape: (2, 2)
b_h shape: (2,)


In [5]:
x_seq = tf.convert_to_tensor(
    [[1.0]*5, [2.0]*5, [3.0]*5],
    dtype=tf.float32)

In [6]:
## output of SimpleRNN
output = rnn_layer(tf.reshape(x_seq, shape=(1,3,5)))

2022-02-03 17:14:36.806314: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0


In [7]:
## manually computing the output:
out_man = []
for t in range(len(x_seq)):
    xt = tf.reshape(x_seq[t], (1,5))
    print('Time step {} =>'.format(t))
    print('   Input             :', xt.numpy())
    
    ht = tf.matmul(xt, w_xh) + b_h
    print('   Hidden            :', ht.numpy())
    
    if t>0:
        prev_o = out_man[t-1]
    else:
        prev_o = tf.zeros(shape=(ht.shape))
    ot = ht + tf.matmul(prev_o, w_oo)
    ot = tf.math.tanh(ot)
    out_man.append(ot)
    print('   Output  (manual):', ot.numpy())
    print('   SimpleRNN output:'.format(t),
          output[0][t].numpy())
    print()

Time step 0 =>
   Input             : [[1. 1. 1. 1. 1.]]
   Hidden            : [[0.41464037 0.96012145]]
   Output  (manual): [[0.39240566 0.744331  ]]
   SimpleRNN output: [0.39240566 0.744331  ]

Time step 1 =>
   Input             : [[2. 2. 2. 2. 2.]]
   Hidden            : [[0.82928073 1.9202429 ]]
   Output  (manual): [[0.8011651 0.9912947]]
   SimpleRNN output: [0.8011651 0.9912947]

Time step 2 =>
   Input             : [[3. 3. 3. 3. 3.]]
   Hidden            : [[1.243921  2.8803642]]
   Output  (manual): [[0.9546827 0.999307 ]]
   SimpleRNN output: [0.9546827 0.999307 ]



# Implementing RNNs for sequence modeling in TensorFlow

# Project one - predicting the sentiment of IMDb movie reviews

## Preparing the movie review data

In [8]:
import tensorflow_datasets as tfds
import pandas as pd

In [10]:
df = pd.read_csv('movie_data.csv', encoding='utf-8')

In [11]:
## Step 1: create a dataset
target = df.pop('sentiment')
ds_raw = tf.data.Dataset.from_tensor_slices((df.values, target.values))

In [12]:
## inspection:
for ex in ds_raw.take(3):
    tf.print(ex[0].numpy()[0][:50], ex[1])

b'In 1974, the teenager Martha Moxley (Maggie Grace)' 1
b'OK... so... I really like Kris Kristofferson and h' 0
b'***SPOILER*** Do not read this, if you think about' 0


In [13]:
tf.random.set_seed(1)
ds_raw = ds_raw.shuffle(50000, reshuffle_each_iteration=False)

ds_raw_test = ds_raw.take(25000)
ds_raw_train_valid = ds_raw.skip(25000)
ds_raw_train = ds_raw_train_valid.take(20000)
ds_raw_valid = ds_raw_train_valid.skip(20000)

In [14]:
## Step 2: find unique tokens (words)
from collections import Counter

tokenizer = tfds.features.text.Tokenizer()
token_counts = Counter()

for example in ds_raw_train:
    tokens = tokenizer.tokenize(example[0].numpy()[0])
    token_counts.update(tokens)
    
print('Vocab-size:', len(token_counts))

Vocab-size: 87007


In [15]:
## Step 3: encoding unique tokens to integers
encoder = tfds.features.text.TokenTextEncoder(token_counts)
example_str = 'This is an example!'
print(encoder.encode(example_str))

[232, 9, 270, 1123]


In [16]:
## Step 3-A: define the function for transformation
def encode(text_tensor, label):
    text = text_tensor.numpy()[0]
    encoded_text = encoder.encode(text)
    return encoded_text, label

In [17]:
## Step 3-B: wrap the encode function to a TF Op.
def encode_map_fn(text, label):
    return tf.py_function(encode, inp=[text, label], Tout=(tf.int64, tf.int64))

In [18]:
ds_train = ds_raw_train.map(encode_map_fn)
ds_valid = ds_raw_valid.map(encode_map_fn)
ds_test = ds_raw_test.map(encode_map_fn)

In [19]:
# look at the shape of some examples:
tf.random.set_seed(1)
for example in ds_train.shuffle(1000).take(5):
    print('Sequence length:', example[0].shape)

Sequence length: (24,)
Sequence length: (179,)
Sequence length: (262,)
Sequence length: (535,)
Sequence length: (130,)


In [20]:
## Take a small subset
ds_subset = ds_train.take(8)
for example in ds_subset:
    print('Individual size:', example[0].shape)

Individual size: (119,)
Individual size: (688,)
Individual size: (308,)
Individual size: (204,)
Individual size: (326,)
Individual size: (240,)
Individual size: (127,)
Individual size: (453,)


In [21]:
## Dividing the dataset into batches
ds_batched = ds_subset.padded_batch(4, padded_shapes=([-1],[]))

In [22]:
for batch in ds_batched:
    print('Batch dimension:', batch[0].shape)

Batch dimension: (4, 688)
Batch dimension: (4, 453)


In [23]:
train_data = ds_train.padded_batch(32, padded_shapes=([-1],[]))
valid_data = ds_valid.padded_batch(32, padded_shapes=([-1],[]))
test_data = ds_test.padded_batch(32, padded_shapes=([-1],[]))

In [24]:
from tensorflow.keras.layers import Embedding

model = tf.keras.Sequential()

model.add(Embedding(input_dim=100,
                    output_dim=6,
                    input_length=20,
                    name='embed-layer'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embed-layer (Embedding)      (None, 20, 6)             600       
Total params: 600
Trainable params: 600
Non-trainable params: 0
_________________________________________________________________
