# Natural Language Processing

1. Character Level Modelling
- Stateless RNN
- Stateful RNN
2. Word Level Modelling
3. Sentiment Analysis 

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
print(tf.__version__)
def cls():
    tf.random.set_seed(42)
    tf.keras.backend.clear_session()
cls()



2.9.1


# 1. Character level Modelling 
We try to generate next character of a word.

In [2]:
########################################
# Lets download some shakespeare data. #
########################################
import tensorflow as tf

shakespeare_url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
filepath = tf.keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
    shakespeare_text = f.read()

In [3]:
# Lets save this text
shakespeare_text_path = "datasets/shakespear_text.txt"
with open(shakespeare_text_path, "w") as text_file:
    text_file.write(shakespeare_text)

In [4]:
# First few lines
print(shakespeare_text[:100])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


In [5]:
# These are the only unique characters used in text
print("".join(sorted(set(shakespeare_text.lower()))))
print("Total length: ", len("".join(sorted(set(shakespeare_text.lower())))))


 !$&',-.3:;?abcdefghijklmnopqrstuvwxyz
Total length:  39


In [6]:
############################################
# Convert each character to integer number #
############################################
cls()
text_vec_layer = tf.keras.layers.TextVectorization(split="character",
                                                   standardize="lower")
text_vec_layer.adapt([shakespeare_text])

print("Unique character: ", text_vec_layer.vocabulary_size())

Unique character:  41


In [9]:
encoded = text_vec_layer([shakespeare_text])[0]
print("Total Characters in text file: ", encoded.shape)
encoded[:10]

Total Characters in text file:  (1115394,)


<tf.Tensor: shape=(10,), dtype=int64, numpy=array([21,  7, 10,  9,  4,  2, 20,  7,  4,  7], dtype=int64)>

In [10]:
# As you can see total unique element in text file according to tf is 41
# while actually it is 39
# so, we need to drop 2 other elements which is token 0 (pad) and 1 (unknown)
# these are the most frequent elements that why its position at 0 and 1
encoded -= 2  # drop tokens 0 (pad) and 1 (unknown), which we will not use
n_tokens = text_vec_layer.vocabulary_size() - 2  # number of distinct chars = 39
dataset_size = len(encoded)  # total number of chars = 1,115,394

In [11]:
n_tokens

39

In [12]:
dataset_size

1115394

### A. Stateless RNN:

In [13]:
# seq2seq dataset creation
def to_dataset(sequence, length, shuffle=False, seed=None, batch_size=32):
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    ds = ds.window(length + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window_ds: window_ds.batch(length + 1))
    if shuffle:
        ds = ds.shuffle(100_000, seed=seed)
    ds = ds.batch(batch_size)
    return ds.map(lambda window: (window[:, :-1], window[:, 1:])).prefetch(1)

In [14]:
# Only one sample
# input  : To be
# target : o be 
dataset = to_dataset(text_vec_layer(["To be"])[0], length=4)
list(dataset.take(1))

[(<tf.Tensor: shape=(1, 4), dtype=int64, numpy=array([[ 4,  5,  2, 23]], dtype=int64)>,
  <tf.Tensor: shape=(1, 4), dtype=int64, numpy=array([[ 5,  2, 23,  3]], dtype=int64)>)]

In [15]:
print("Input : ", [text_vec_layer.get_vocabulary()[i] for i in list(dataset.take(1))[0][0].numpy()[0]])

print("Output: ", [text_vec_layer.get_vocabulary()[i] for i in list(dataset.take(1))[0][1].numpy()[0]])

Input :  ['t', 'o', ' ', 'b']
Output:  ['o', ' ', 'b', 'e']


In [16]:
# Dataset prepartion
# we have total 1_115_394 characters
# lets split df into train, valid, test
length = 50 # this would be the single instance length
cls()
train_set = to_dataset(encoded[:1_000_000], length=length, shuffle=True,seed=42)
valid_set = to_dataset(encoded[1_000_000:1_060_000], length=length)
test_set = to_dataset(encoded[1_060_000:], length=length)

In [17]:
# As we can see our data is in decimal number so we need to make 
# it as an One-Hot or Embedding number.
list(train_set.take(1))

[(<tf.Tensor: shape=(32, 50), dtype=int64, numpy=
  array([[15,  0,  9, ...,  1,  8, 30],
         [25,  1,  0, ..., 13, 14,  0],
         [13,  7, 23, ...,  0,  5,  9],
         ...,
         [25,  1,  0, ...,  0,  4,  0],
         [11, 12,  0, ..., 12,  0,  6],
         [ 1,  0,  5, ...,  7,  0,  4]], dtype=int64)>,
  <tf.Tensor: shape=(32, 50), dtype=int64, numpy=
  array([[ 0,  9,  3, ...,  8, 30, 10],
         [ 1,  0,  3, ..., 14,  0, 16],
         [ 7, 23, 10, ...,  5,  9,  0],
         ...,
         [ 1,  0, 21, ...,  4,  0,  8],
         [12,  0, 15, ...,  0,  6,  5],
         [ 0,  5,  2, ...,  0,  4,  0]], dtype=int64)>)]

In [18]:
# our input would be 32,100 where 32 batch size, 50 time step
# our output would be 32,100 where 32 batch size, 50 time step
# as we can see when input is 15,0,9 then output => 0,9,3... so on.

In [19]:
# Let do it first using One-Hot Encoding

train_set_oh = train_set.map(lambda x,y: (tf.one_hot(x,depth=n_tokens), y)) 
valid_set_oh = valid_set.map(lambda x,y: (tf.one_hot(x,depth=n_tokens), y)) 
test_set_oh  = test_set.map(lambda x,y: (tf.one_hot(x,depth=n_tokens), y)) 

list(train_set_oh.take(1))

[(<tf.Tensor: shape=(32, 50, 39), dtype=float32, numpy=
  array([[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
          [1., 0., 0., ..., 0., 0., 0.],
          [0., 0., 1., ..., 0., 0., 0.]],
  
         [[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [1., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 1., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.]],
  
         [[0., 1., 0., ..., 0., 0., 0.],
          [1., 0., 0., ..., 0., 0., 0.],
          [0., 1., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
          [1., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.]],
  
         ...,
  
         [[0., 1., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 1., 0., ..., 0., 0., 0.],
         

In [20]:
# Input: Each time step has 39 features: mean it will fire where 1 occur (place of character)
# So in input layer there would be 39 neurons. bcz there is only 39
# distinct values.

# Similarly there would be 39 value in output layer bcz there is 
# 39 distinct value each neuron represent the probability of each number


In [21]:
# Let build the neural net using One-Hot
cls()
model = tf.keras.Sequential([ # from tf version 2.8.x
    tf.keras.layers.GRU(32, return_sequences=True, input_shape=[None, n_tokens], dropout=0.2, recurrent_dropout=0.2),
    tf.keras.layers.GRU(16, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(n_tokens, activation="softmax"))
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, None, 32)          7008      
                                                                 
 gru_1 (GRU)                 (None, None, 16)          2400      
                                                                 
 time_distributed (TimeDistr  (None, None, 39)         663       
 ibuted)                                                         
                                                                 
Total params: 10,071
Trainable params: 10,071
Non-trainable params: 0
_________________________________________________________________


In [None]:
#########################################################
# WARNING: This simple model may take one hour to train #
# Because our training data is large                    #
#########################################################

model.compile(loss="sparse_categorical_crossentropy", 
            optimizer="nadam",
            metrics=["accuracy"])
#model_ckpt = tf.keras.callbacks.ModelCheckpoint("my_shakespeare_model", monitor="val_accuracy", save_best_only=True)
history = model.fit(train_set_oh, validation_data=train_set_oh, epochs=5)

In [23]:
# Let build the neural net using embedding layer
cls()
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=n_tokens, output_dim=16),
    tf.keras.layers.GRU(128, return_sequences=True),
    tf.keras.layers.Dense(n_tokens, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam",
              metrics=["accuracy"])
model_ckpt = tf.keras.callbacks.ModelCheckpoint("my_shakespeare_model", monitor="val_accuracy", save_best_only=True)
#history = model.fit(train_set, validation_data=valid_set, epochs=10,callbacks=[model_ckpt])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 16)          624       
                                                                 
 gru (GRU)                   (None, None, 128)         56064     
                                                                 
 dense (Dense)               (None, None, 39)          5031      
                                                                 
Total params: 61,719
Trainable params: 61,719
Non-trainable params: 0
_________________________________________________________________


In [24]:
cls()
shakespeare_model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Lambda(lambda X: X - 2),  # no <PAD> or <UNK> tokens
    model
])
shakespeare_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 lambda (Lambda)             (None, None)              0         
                                                                 
 sequential (Sequential)     (None, None, 39)          61719     
                                                                 
Total params: 61,719
Trainable params: 61,719
Non-trainable params: 0
_________________________________________________________________


In [25]:
from pathlib import Path
# Lets download the trained model
url = "https://github.com/ageron/data/raw/main/shakespeare_model.tgz"
path = tf.keras.utils.get_file("shakespeare_model.tgz", url, extract=True)
model_path = Path(path).with_name("shakespeare_model")
model_path

WindowsPath('C:/Users/girraj.jangid/.keras/datasets/shakespeare_model')

In [26]:
cls()
shakespeare_model = tf.keras.models.load_model(model_path)
shakespeare_model.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 lambda_5 (Lambda)           (None, None)              0         
                                                                 
 sequential (Sequential)     (None, None, 39)          61719     
                                                                 
Total params: 61,719
Trainable params: 61,719
Non-trainable params: 0
_________________________________________________________________


In [27]:
y_proba = shakespeare_model.predict(["To be or not to b"])
y_proba.shape # One instance 17 character each predict probability of 39 character
# We only need last time step prediction so their would be only 39



(1, 17, 39)

In [28]:
y_pred = tf.argmax(y_proba[0],axis=1)  # choose the most probable character ID
y_pred

<tf.Tensor: shape=(17,), dtype=int64, numpy=
array([ 6,  0,  2,  1,  0,  2,  9, 12,  9,  3,  2,  0,  7,  6,  0,  2,  1],
      dtype=int64)>

In [29]:
def print_vocab(lst):
    return print(''.join([text_vec_layer.get_vocabulary()[i] for i in lst]))

In [30]:
print_vocab(y_pred+2)

h te tndnot sh te


In [31]:
# We only care about last character bcz remaining we already know
y_proba = shakespeare_model.predict(["To be or not to b"])[0, -1]
y_pred = tf.argmax(y_proba)  # choose the most probable character ID
text_vec_layer.get_vocabulary()[y_pred + 2]



'e'

In [32]:
###############################
# Let generate some fake text #
###############################
# As in practice, if we feed predicted value again in model then it
# turn out to repeat the same value as prediction. 
#
#

# log values of 50, 40, 10%
log_probas = tf.math.log([[0.5, 0.4, 0.1]])#probas = 50%, 40%, and 10%
# This will draw num_samples from log_probas probability distribution
# as you can see 50% values in output are 1
# 40% values in output is 0
# 10% values in output is 2
tf.random.categorical(log_probas, num_samples=50)  # draw 8 samples

<tf.Tensor: shape=(1, 50), dtype=int64, numpy=
array([[0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 2, 1, 1, 0, 2, 0, 1, 1, 1,
        0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
        1, 0, 2, 0, 0, 1]], dtype=int64)>

In [33]:
def next_char(text, temperature=1):
    # 39 probability
    y_proba = shakespeare_model.predict([text],verbose=False)[0, -1:]
    # Make distribution of 39 log prob samples
    rescaled_logits = tf.math.log(y_proba) / temperature
    # take some random value from that distribution
    char_id = tf.random.categorical(rescaled_logits, num_samples=1)[0, 0]
    return text_vec_layer.get_vocabulary()[char_id + 2]

In [34]:
def extend_text(text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        text += next_char(text, temperature)
    return text

In [35]:
cls()
# Temperature will near to 0 favor high probability character
# while very high value give all characters equaly value
text = extend_text("To be or not to be", n_chars=500, temperature=0.01)
print(text)

To be or not to be the duke
as it is a proper strange death,
and then the sea to the death, and the duke and the death,
and then the sea to the death, and the duke and the death,
and then the sea to the death, and the duke and the death,
and then the sea to the death, and the duke and the death,
and then the strange daughter is a strange daughter,
and the death and the death, and the death,
and then the sea to the death, and the duke and the death,
and then the strange daughter is a strange daughter,
and the deat


In [36]:
text = extend_text("To be or not to be", n_chars=50, temperature=1)
print(text)

To be or not to be win their joience
shall bear, you will ne'er affe


In [37]:
text = extend_text("To be or not to be", n_chars=50, temperature=100)
print(text)

To be or not to bewnxsukeqz r,we
gj'kfb!i'x-a :ndp;mq.t3iprkfdlxkzu.


### B. Stateful RNN:
Till now, At each training iteration the model starts with hidden state
full of zeros, then it updates this state at each time step,
and after the last time step,  it throws it away, as it is not
needed anymore. what if we told the RNN to preserve this final
state after preprocessing one training batch and use it as the initial
state for the next training batch this way the model can learn
long term pattern despite only backpropagation through short sequences
this is called a `stateful RNN`.

Stateful RNN only make sense when each input sequence in a batch starts exactly where the corresponding sequence in the previous batch left off. i.e. Non-overlapping sequence need to prepare rather than shuffled and overlapped sequences we used to train stateless RNN.

We must use shift=n_step instead of shift=1

In [38]:
def to_dataset_for_stateful_rnn(sequence, length):
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    # must set shift=length for non-overlapping
    ds = ds.window(length + 1, shift=length, drop_remainder=True)
    # Use batch 1 bcz each batchs first value would 
    # be next of previous batchs last value
    ds = ds.flat_map(lambda window: window.batch(length + 1)).batch(1)
    return ds.map(lambda window: (window[:, :-1], window[:, 1:])).prefetch(1)

In [39]:
length = 50
stateful_train_set = to_dataset_for_stateful_rnn(encoded[:1_000_000], length)
stateful_valid_set = to_dataset_for_stateful_rnn(encoded[:1_000_000], length)
stateful_test_set = to_dataset_for_stateful_rnn(encoded[1_060_000:], length)

In [40]:
list(to_dataset_for_stateful_rnn(tf.range(10), 3))

[(<tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[0, 1, 2]])>,
  <tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[1, 2, 3]])>),
 (<tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[3, 4, 5]])>,
  <tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[4, 5, 6]])>),
 (<tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[6, 7, 8]])>,
  <tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[7, 8, 9]])>)]

In [41]:
# If we want to have more then 1 instance then 
def to_non_overlapping_windows(sequence, length):
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    ds = ds.window(length + 1, shift=length, drop_remainder=True)
    return ds.flat_map(lambda window: window.batch(length + 1))

def to_batched_dataset_for_stateful_rnn(sequence, length, batch_size=32):
    parts = np.array_split(sequence, batch_size)
    datasets = tuple(to_non_overlapping_windows(part, length) for part in parts)
    ds = tf.data.Dataset.zip(datasets).map(lambda *windows: tf.stack(windows))
    return ds.map(lambda window: (window[:, :-1], window[:, 1:])).prefetch(1)

In [42]:
list(to_batched_dataset_for_stateful_rnn(tf.range(20), length=3, batch_size=2))

[(<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
  array([[ 0,  1,  2],
         [10, 11, 12]])>,
  <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
  array([[ 1,  2,  3],
         [11, 12, 13]])>),
 (<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
  array([[ 3,  4,  5],
         [13, 14, 15]])>,
  <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
  array([[ 4,  5,  6],
         [14, 15, 16]])>),
 (<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
  array([[ 6,  7,  8],
         [16, 17, 18]])>,
  <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
  array([[ 7,  8,  9],
         [17, 18, 19]])>)]

In [43]:
##################################################################
# 1. While buildling the model we must specify batch_input_shape
# because it will preserve a state for each input sequence in 
# the batch
# 2. must use stateful=True in RNN layer
##################################################################

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=n_tokens, output_dim=16,
                              batch_input_shape=[1, None]),
    tf.keras.layers.GRU(128, return_sequences=True, stateful=True),
    tf.keras.layers.Dense(n_tokens, activation="softmax")
])

In [44]:
# At the end of epoch need to reset the state of batch
class ResetStatesCallback(tf.keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs):
        self.model.reset_states()

In [45]:
model_ckpt = tf.keras.callbacks.ModelCheckpoint(
    "my_stateful_shakespeare_model",
    monitor="val_accuracy",
    save_best_only=True)

In [None]:
# Warning: This code will take more than hour to run.
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam",
              metrics=["accuracy"])
history = model.fit(stateful_train_set, validation_data=stateful_valid_set,
                    epochs=10, callbacks=[ResetStatesCallback(), model_ckpt])

In [46]:
# During prediction we must have to pass same batch size
# to overcome this problem we can convert stateful RNN to stateless RNN
# after training. Just need to copy the weights

# Creating the dump model of stateful.
stateless_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=n_tokens, output_dim=16),
    tf.keras.layers.GRU(128, return_sequences=True),
    tf.keras.layers.Dense(n_tokens, activation="softmax")
])


In [47]:
stateless_model.build(tf.TensorShape([None, None]))
stateless_model.set_weights(model.get_weights()) # stateful RNN is model

In [48]:
shakespeare_model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Lambda(lambda X: X - 2),  # no <PAD> or <UNK> tokens
    stateless_model
])

In [49]:
print(extend_text("to be or not to be", temperature=0.01))

to be or not to be;aohdjj?cbtbxri,m?fch,?y!.&kkb,pdj33h&yp$,jrm'snws


# 2. Word Level Modelling
We will built a sentiment analysis model using IMDB dataset

In [190]:
(raw_train_set, raw_valid_set, raw_test_set),info = tfds.load(name="imdb_reviews",split=["train[:80%]", "train[80%:90%]", "train[90%:]"],as_supervised=True, with_info=True)
# Lets use keras dataset
# (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000) # read only top 10000 word occured
# word_index = tf.keras.datasets.imdb.get_word_index()

# x_valid, y_valid = x_test[:10_000], y_test[:10_000]
# x_test, y_test = x_test[10_000:], y_test[10_000:]
# print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

In [201]:
raw_train_set.cardinality(), raw_valid_set.cardinality(), raw_test_set.cardinality()

(<tf.Tensor: shape=(), dtype=int64, numpy=20000>,
 <tf.Tensor: shape=(), dtype=int64, numpy=2500>,
 <tf.Tensor: shape=(), dtype=int64, numpy=2500>)

In [227]:
for review, label in raw_train_set.take(1):
    print(review.numpy().decode("utf-8")[:200], "...")
    print("Label:", label.numpy())

This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting  ...
Label: 0


In [275]:
cls()
batch_size = 12
train_set = raw_train_set.shuffle(5000, seed=42).batch(batch_size).prefetch(1)
valid_set = raw_valid_set.batch(batch_size).prefetch(1)
test_set = raw_test_set.batch(batch_size).prefetch(1)

In [276]:
# First instance of first batch
list(train_set.take(1))[0][0][0].numpy()[:200]

b'This is a documentary that came out of the splendid work of a Canadian landscape photographer whose interest has long been in the ravages left on earth by the excavations or buildings of man. It begin'

In [277]:
# Lets build vocabulary of most occured 1000 words
vocab_size = 1_000
text_vec_layer = tf.keras.layers.TextVectorization(max_tokens=vocab_size)
text_vec_layer.adapt(train_set.map(lambda reviews, labels: reviews)) # passing only reviews not label

In [278]:
text_vec_layer.get_vocabulary()[:5]
# '', [UNK] are the most occured words

['', '[UNK]', 'the', 'and', 'a']

In [279]:
seq_to_vector = text_vec_layer(list(train_set.take(1))[0][0][0])
seq_to_vector

<tf.Tensor: shape=(194,), dtype=int64, numpy=
array([  1,   7,  11,   1,   8, 188,   1,   4,   1,   5,   1,  12, 156,
        58,   2,   1, 454,   4,   1,   1,  16,   1,   1, 295,   1,   1,
       672, 383,   2, 165, 214,  28, 255,   8,   2, 430,   1,   1, 958,
        60, 267, 101,   1,   6, 921,  17,  28, 247,  45,   4, 166,  97,
         5,   1,  24,   1,   1,   1, 440,   1, 674, 154,   1,   1,  12,
         1,   1,   7,   6, 937,  49,   1,   1,  14,   6,   1,   1,   8,
         1, 663,   1,   1,   2, 198,   1,  37,   2, 242, 204, 255,   4,
       219, 171, 324,   1,   8,  11, 521,   3,   1, 105, 149, 296,   5,
         1,   1,   1, 384, 313, 304, 643, 384,   1,  12,   1, 669,   1,
         1,   1,   6,  34, 606,   1,  33,   1,   1,   1,  55, 498, 133,
         1,   1,   1,   1,   1,   1,   1,   1,   1,   1,  50,  29,  23,
       325,   4,   1, 863, 597,   1,  58,  11, 735,  17,   4, 544,   5,
         1,  16,  11,   1,   1, 339,   3,  44,  23,  57,  39,  23,  69,
       130,   4,  

In [292]:
# As we already discussed-
# There are two option to normalize this decimal numbers
# 1. via One-Hot-Enconding [Need to be 1000 neurons or 1000 dim data]
# 2. Embedding Layer [tunable neuron]
# 
cls()
# Embedding layer has one row for one word i.e. 
# there are 194 word in sentence
tf.keras.layers.Embedding(vocab_size,128)(seq_to_vector)

<tf.Tensor: shape=(194, 128), dtype=float32, numpy=
array([[ 0.03977952, -0.03782602, -0.03646283, ...,  0.00236253,
         0.03332629,  0.02803668],
       [ 0.02248487, -0.02848336,  0.04786098, ...,  0.03069806,
        -0.04317403, -0.04145076],
       [-0.02399485,  0.01468222,  0.00041829, ...,  0.02498427,
        -0.02674054, -0.00808267],
       ...,
       [ 0.03977952, -0.03782602, -0.03646283, ...,  0.00236253,
         0.03332629,  0.02803668],
       [ 0.03977952, -0.03782602, -0.03646283, ...,  0.00236253,
         0.03332629,  0.02803668],
       [ 0.03977952, -0.03782602, -0.03646283, ...,  0.00236253,
         0.03332629,  0.02803668]], dtype=float32)>

In [294]:
# Embed size is hyper parameter you can tune it
embed_size = 128
cls()

model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Embedding(vocab_size, embed_size),
    tf.keras.layers.GRU(12),
    tf.keras.layers.Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy", optimizer="nadam", metrics=["accuracy"])
history = model.fit(train_set, validation_data=valid_set, epochs=1)



### Masking
When we need to ignore some words like padding tokens. Then we can use mask concept. This layer will propagate mask dataset (boolean dataset where 1 is set at padding position in time step and 0 at other time steps). So, the all recurrent layers will ignore the masked time step. Basically recurrent cells hidden state will transfer the previous state to the next state when it encounter masked time step. There would be no loss contribution

In [None]:
#To work this properly all layer should support masking
# 
#
embed_size = 128
cls()
model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Embedding(vocab_size, embed_size, mask_zero=True),
    tf.keras.layers.GRU(128),
    tf.keras.layers.Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy", optimizer="nadam",
              metrics=["accuracy"])
history = model.fit(train_set, validation_data=valid_set, epochs=5)

In [302]:
#########################################
# Custom masking Using Functional Model #
#########################################

cls()
inputs = tf.keras.layers.Input(shape=[], dtype=tf.string)
token_ids = text_vec_layer(inputs)
mask = tf.math.not_equal(token_ids, 0)
Z = tf.keras.layers.Embedding(vocab_size, embed_size)(token_ids)
Z = tf.keras.layers.GRU(128, return_sequences=True, dropout=0.2)(Z, mask=mask)
Z = tf.keras.layers.GRU(128, dropout=0.2)(Z, mask=mask)
outputs = tf.keras.layers.Dense(1, activation="sigmoid")(Z)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None,)]            0           []                               
                                                                                                  
 text_vectorization (TextVector  (None, None)        0           ['input_1[0][0]']                
 ization)                                                                                         
                                                                                                  
 embedding (Embedding)          (None, None, 128)    128000      ['text_vectorization[9][0]']     
                                                                                                  
 tf.math.not_equal (TFOpLambda)  (None, None)        0           ['text_vectorization[9][0]'] 

In [None]:
model.compile(loss="binary_crossentropy", optimizer="nadam",
              metrics=["accuracy"])
history = model.fit(train_set, validation_data=valid_set, epochs=5)


In [313]:
# This was created using Tensor
text_vec_layer(["Great movie!", "This is dicaprio's best role."])

<tf.Tensor: shape=(2, 5), dtype=int64, numpy=
array([[ 86,  18,   0,   0,   0],
       [ 11,   7,   1, 116, 214]], dtype=int64)>

In [309]:
# You can also create vis ragged Tensor
text_vec_layer_ragged = tf.keras.layers.TextVectorization(max_tokens=vocab_size, ragged=True)
text_vec_layer_ragged.adapt(train_set.map(lambda reviews, labels: reviews))

In [314]:
# You can observe in Ragged tensor our matrix in not proper 2D.
text_vec_layer_ragged(["Great movie!", "This is DiCaprio's best role."])

<tf.RaggedTensor [[86, 18], [11, 7, 1, 116, 214]]>

In [316]:
# The embedding layer will take care about ragged tensor or tensor
embed_size = 5
cls()
model = tf.keras.Sequential([
    text_vec_layer_ragged,
    tf.keras.layers.Embedding(vocab_size, embed_size),
    tf.keras.layers.GRU(2),
    tf.keras.layers.Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy", optimizer="nadam",
              metrics=["accuracy"])
history = model.fit(train_set, validation_data=valid_set, epochs=1)





# 3. Reusing Pretrained Embeddings and Language Models

In [326]:
import os
import tensorflow_hub as hub

# This is change the directory from temp to my_tfhub_cache
os.environ["TFHUB_CACHE_DIR"] = "./datasets/my_tfhub_cache"
cls()
model = tf.keras.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4", trainable=False, dtype=tf.string, input_shape=[]),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy", optimizer="nadam",metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 512)               256797824 
                                                                 
 dense (Dense)               (None, 64)                32832     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 256,830,721
Trainable params: 32,897
Non-trainable params: 256,797,824
_________________________________________________________________


In [328]:
model.fit(train_set, validation_data=valid_set, epochs=1)



<keras.callbacks.History at 0x1ada4752910>