In [256]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import time
from posixpath import join

In [257]:
print("Tensorflow version:",tf.__version__)

Tensorflow version: 2.10.1


## Get Data

In [258]:
path_to_file = tf.keras.utils.get_file(
    fname='shakespeare.txt', 
    origin='https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt',
    cache_dir="."
)

### Read File

In [259]:
# Decode the file
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# Print first 100 Characters
print(text[0:100])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


#### Total Characters

In [260]:
print(f'Length of text: {len(text)} characters')

Length of text: 1115394 characters


#### Unique Characters

In [261]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')
print(vocab)

65 unique characters
['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


#### Since ML training requires number, assign each character a number 

#### Each character the vocabulary will be assigned a unique number

In [262]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), 
    mask_token=None
)
''' Print the vocabulary '''
for i in range(len(ids_from_chars.get_vocabulary())):
    print(f'{i} : {ids_from_chars.get_vocabulary()[i]}')
    
''' Here UNK is special Character for Out of Vocabulary Words (OOV)'''
''' 1 corresponds to \n '''
''' 2 corresponds to space'''

0 : [UNK]
1 : 

2 :  
3 : !
4 : $
5 : &
6 : '
7 : ,
8 : -
9 : .
10 : 3
11 : :
12 : ;
13 : ?
14 : A
15 : B
16 : C
17 : D
18 : E
19 : F
20 : G
21 : H
22 : I
23 : J
24 : K
25 : L
26 : M
27 : N
28 : O
29 : P
30 : Q
31 : R
32 : S
33 : T
34 : U
35 : V
36 : W
37 : X
38 : Y
39 : Z
40 : a
41 : b
42 : c
43 : d
44 : e
45 : f
46 : g
47 : h
48 : i
49 : j
50 : k
51 : l
52 : m
53 : n
54 : o
55 : p
56 : q
57 : r
58 : s
59 : t
60 : u
61 : v
62 : w
63 : x
64 : y
65 : z


' 2 corresponds to space'

#### Here is a small example of how to convert string to numbers and back

In [263]:
example_texts = ['Machine Learning']
chars = tf.strings.unicode_split(
    example_texts, 
    input_encoding='UTF-8'
)
ids = ids_from_chars(chars)
print("Numerical Representation: ",ids)

Numerical Representation:  <tf.RaggedTensor [[26, 40, 42, 47, 48, 53, 44, 2, 25, 44, 40, 57, 53, 48, 53, 46]]>


In [264]:
''' This layer maps numbers to character '''
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), 
    invert=True, 
    mask_token=None
)

character_representation = chars_from_ids(ids)
print("Character representation: \n", character_representation)

string_representation = tf.strings.reduce_join(character_representation)
print("Get String from Ids:",string_representation.numpy())

Character representation: 
 <tf.RaggedTensor [[b'M', b'a', b'c', b'h', b'i', b'n', b'e', b' ', b'L', b'e', b'a', b'r',
  b'n', b'i', b'n', b'g']]>
Get String from Ids: b'Machine Learning'


In [265]:
''' Creating Ids to Strings function for future use '''
def string_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids))

## Create Training Data

In [266]:
''' Convert Text to numbers '''
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1], dtype=int64)>

#### Convert Ids into Tensorflow Dataset

In [267]:
''' Convert our numpy data to Tensorflow Dataset'''
''' Tensorflow Dataset is better suited for training and is fast '''
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
print("Tensorflow Dataset for ids", ids_dataset)

Tensorflow Dataset for ids <TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>


#### Print 10 elements from TF Dataset

In [268]:
k = 0
for i in ids_dataset:
    print(f"At index {k}: {i}")
    if k >= 10:
        break
    k = k + 1

At index 0: 19
At index 1: 48
At index 2: 57
At index 3: 58
At index 4: 59
At index 5: 2
At index 6: 16
At index 7: 48
At index 8: 59
At index 9: 48
At index 10: 65


In [269]:
''' Better Way to do it '''
for ids in ids_dataset.take(10):
    ''' chars_from_ids: Converts Numbers to Characters '''
    print(chars_from_ids(ids).numpy().decode('utf-8'))

F
i
r
s
t
 
C
i
t
i


### How Does our training Data looks like?

In [270]:
''' Suppose we have the words : "Machine Learning" '''
''' When we input "M" we want our model to predict "a" '''
''' When we feed in "a" we want our model to predict "c" '''
''' and so on '''

#  Input: "Machine Learnin"
#  Label: "achine Learning"

' and so on '

#### Create Sequences of Length 100

In [271]:
''' Why are we creating batches of 100 + 1 will be clear when we create Input and Label pairs '''
seq_length = 100
sequences = ids_dataset.batch(
    seq_length+1, 
    drop_remainder=True
)

In [272]:
''' Sequences in Numbers '''
for seq in sequences.take(1):
    print(seq)
    
''' Sequences in Characters '''
for seq in sequences.take(1):
    print(chars_from_ids(seq))
    
''' Sequences in strings '''
for seq in sequences.take(5):
    print("\n",string_from_ids(seq).numpy())

tf.Tensor(
[19 48 57 58 59  2 16 48 59 48 65 44 53 11  1 15 44 45 54 57 44  2 62 44
  2 55 57 54 42 44 44 43  2 40 53 64  2 45 60 57 59 47 44 57  7  2 47 44
 40 57  2 52 44  2 58 55 44 40 50  9  1  1 14 51 51 11  1 32 55 44 40 50
  7  2 58 55 44 40 50  9  1  1 19 48 57 58 59  2 16 48 59 48 65 44 53 11
  1 38 54 60  2], shape=(101,), dtype=int64)
tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)

 b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\

### Create Input and Labels

In [273]:
''' This function will take 101 length Sequence and create Input and Label '''
''' Input: 0-99 Characters '''
''' Label: 1:100 Characters '''
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

'''Example '''
split_input_target(list("Machine Learning"))

(['M', 'a', 'c', 'h', 'i', 'n', 'e', ' ', 'L', 'e', 'a', 'r', 'n', 'i', 'n'],
 ['a', 'c', 'h', 'i', 'n', 'e', ' ', 'L', 'e', 'a', 'r', 'n', 'i', 'n', 'g'])

In [274]:
dataset = sequences.map(split_input_target)

In [275]:
for inputs, labels in dataset.take(1):
    print(string_from_ids(inputs))
    print(string_from_ids(labels))

tf.Tensor(b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou', shape=(), dtype=string)
tf.Tensor(b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou ', shape=(), dtype=string)


### Create Training Batches 

In [276]:
batch_size = 64
buffer_size = 1024

dataset = (
    dataset
    .shuffle(buffer_size)
    .batch(batch_size, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

## Create Model

In [277]:
''' Set Parameters '''
vocab_size = len(ids_from_chars.get_vocabulary())
embedding_dimensions = 256
LSTM_Cells = 1024

###  Model: Input -> Embedding_layer -> GRU_Layer -> Dense -> Softmax 

### Stateless

In [278]:
''' Our Input length is Sequence length: 100'''
x = tf.keras.layers.Input(shape=(None, ))

''' Each id in Vocabulary is mapped to a 256 length vector '''
embeddings = tf.keras.layers.Embedding(
    input_dim=vocab_size, 
    output_dim=embedding_dimensions,
    name="Embedding_Layer"
)(x)

''' GRU Layer '''
gru_output = tf.keras.layers.GRU(
    units=LSTM_Cells,
    return_sequences=True,
    name="GRU_Layer"
)(embeddings)

''' Prediction layer '''
prediction = tf.keras.layers.Dense(
    units=vocab_size,
    name="Dense_Layer"
)(gru_output)

model = tf.keras.Model(inputs=x, outputs=prediction)
model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, None)]            0         
                                                                 
 Embedding_Layer (Embedding)  (None, None, 256)        16896     
                                                                 
 GRU_Layer (GRU)             (None, None, 1024)        3938304   
                                                                 
 Dense_Layer (Dense)         (None, None, 66)          67650     
                                                                 
Total params: 4,022,850
Trainable params: 4,022,850
Non-trainable params: 0
_________________________________________________________________


### Stateful

#### Make a prediction

In [279]:
for inputs, labels in dataset.take(1):
    sampled_input, sampled_label = inputs, labels
    print("Input :", string_from_ids(inputs[0]))
    print("Label :", string_from_ids(labels[0]))
    prediction = model(inputs)
    print("Batch Size: ",prediction.shape[0])
    print("Sequence Length: ",prediction.shape[1])
    print("Vocabulary Size: ",prediction.shape[2])
    

Input : tf.Tensor(b'us.\n\nTITUS:\n\nCOMINIUS:\nNoble Marcius!\n\nFirst Senator:\n\nMARCIUS:\nNay, let them follow:\nThe Volsces ha', shape=(), dtype=string)
Label : tf.Tensor(b's.\n\nTITUS:\n\nCOMINIUS:\nNoble Marcius!\n\nFirst Senator:\n\nMARCIUS:\nNay, let them follow:\nThe Volsces hav', shape=(), dtype=string)
Batch Size:  64
Sequence Length:  100
Vocabulary Size:  66


In [280]:
def get_string_from_prediction(prediction):
    ''' Get max probability index for each sequence '''
    ids = tf.argmax(prediction, axis=1)
    ''' Get the string '''
    return string_from_ids(ids.numpy())

In [282]:
''' Untrained model results '''
print("Prediction: ", prediction[0])
print("Text representation: ", get_string_from_prediction(prediction[0]))

Prediction:  tf.Tensor(
[[ 4.55405377e-03 -1.28299426e-02  1.11228914e-03 ... -1.53576443e-03
  -9.77340154e-04  3.24896071e-03]
 [ 1.19512435e-04  1.02393366e-02  4.00568498e-03 ... -5.52313868e-04
   2.45948974e-03  7.68733630e-03]
 [ 4.41037584e-03 -3.49492906e-03  3.57401767e-03 ... -7.55473366e-03
   4.11812216e-05 -9.83093167e-04]
 ...
 [-7.03154411e-03  9.24186874e-03 -5.23744617e-04 ...  1.31909810e-02
   7.82135688e-03 -1.64746866e-03]
 [-5.96299209e-03  5.14188781e-03 -1.68444533e-02 ...  5.89163601e-03
   7.41392467e-03  1.47369690e-04]
 [-9.43620130e-03 -6.11680932e-03 -1.44772707e-02 ...  7.31437583e-04
  -9.78195225e-04  1.46481795e-02]], shape=(100, 66), dtype=float32)
Text representation:  tf.Tensor(b'tGGh 3SSb,Soo  lhRRPVSoRRxCc;;lBd!thihh:pohhKVmSSSJoooolh\nVabVSoRR.L;KSS!nSSC;TEECECCo3SS;LJCG-mk;!S', shape=(), dtype=string)


### Loss Function

In [283]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

### Optimizer

In [284]:
model.compile(optimizer='adam', loss=loss)

In [285]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_dir/stateless/v3'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [325]:
EPOCHS = 100
history = model.fit(
    dataset, 
    epochs=EPOCHS, 
    callbacks=[checkpoint_callback],
    workers=8
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 99/100
Epoch 100/100


#### Load Model from Checkpoint if needed

In [287]:
# model.load_weights('./training_dir/stateless/v2/ckpt_19')

#### Saved Model as h5

In [288]:
# model.save('./saved_model/stateless_ml.h5')
# model = tf.keras.models.load_model('./saved_model/stateless_ml.h5')
# model.summary()

### Set probability of UNK character to -inf

In [327]:
''' Get index of UNK and expand dims using [:,None]'''
unk_index = ids_from_chars(['[UNK]'])
unk_index = tf.expand_dims(unk_index, axis=0)
print(unk_index)

tf.Tensor([[0]], shape=(1, 1), dtype=int64)


In [328]:
''' This mask takes the index of UNK i.e. 0. Index needs to be 2d array '''
''' Sets it value to -inf (we can add more indexes to -inf if needed) '''
''' And outputs an array of size len(vocabulary)'''
skip_ids = unk_index
sparse_mask = tf.SparseTensor(
    indices=skip_ids,
    values=[-float('inf')]*len(skip_ids),
    dense_shape=[len(ids_from_chars.get_vocabulary())]
)

''' Create the array using the sparse_mask'''
prediction_mask = tf.sparse.to_dense(sparse_mask)

In [329]:
print(prediction_mask)

tf.Tensor(
[-inf   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.], shape=(66,), dtype=float32)


#### Create prediction

In [330]:
''' Test String '''
input_to_model = "Second Citizen:\nWould \nAll:\nAgainst him first: he's a "
print(len(input_to_model))

# Convert strings to token IDs.
input_chars = tf.strings.unicode_split(input_to_model, 'UTF-8')
input_ids = ids_from_chars(input_chars)
print(input_ids.shape)
''' Reshaping to [1, 100, 256]'''
input_ids = tf.expand_dims(input_ids, axis=0)

54
(54,)


In [331]:
''' Make prediction '''
predicted_logits = model(input_ids)
print("Shape of prediction", predicted_logits.shape)

Shape of prediction (1, 54, 66)


In [332]:
''' Only Use the last prediction '''
predicted_logits = predicted_logits[:, -1, :]
print("Last Prediction: ",predicted_logits)

Last Prediction:  tf.Tensor(
[[ -9.246437   -17.632954    -9.689228   -12.72849     -9.112964
   -9.590695    -9.763602   -13.179567   -13.928673   -13.362494
  -16.017387   -12.148553    -9.504623   -19.8527     -13.070886
   -0.4352389    1.6912851   -4.4968443   -2.7709296   -2.88457
   -7.7103333    0.5006807   -0.9558542    0.7200699    2.1808965
   -1.5422612    5.142561    -2.2195165  -10.284126     4.313134
   -5.0606885    4.0031476   -1.4036236   -3.4693992  -16.445179
   -8.638715    -5.4636188   -7.1255817   -8.771386   -13.710989
   -0.9573246    8.75647      3.3905513    8.47859     -1.7532681
    9.405699     7.0267553    8.103318     0.13917461   0.8375107
    6.97197      9.272437     8.935901     4.312721    -0.60793453
   10.420002     0.81972086   9.751318     7.719472     8.160555
   -0.22633009   6.89016      6.2521358   -6.127429     3.5153356
   -6.3698115 ]], shape=(1, 66), dtype=float32)


In [333]:
predicted_logits = predicted_logits + prediction_mask
print("Updated prediction logits: ",predicted_logits)

Updated prediction logits:  tf.Tensor(
[[        -inf -17.632954    -9.689228   -12.72849     -9.112964
   -9.590695    -9.763602   -13.179567   -13.928673   -13.362494
  -16.017387   -12.148553    -9.504623   -19.8527     -13.070886
   -0.4352389    1.6912851   -4.4968443   -2.7709296   -2.88457
   -7.7103333    0.5006807   -0.9558542    0.7200699    2.1808965
   -1.5422612    5.142561    -2.2195165  -10.284126     4.313134
   -5.0606885    4.0031476   -1.4036236   -3.4693992  -16.445179
   -8.638715    -5.4636188   -7.1255817   -8.771386   -13.710989
   -0.9573246    8.75647      3.3905513    8.47859     -1.7532681
    9.405699     7.0267553    8.103318     0.13917461   0.8375107
    6.97197      9.272437     8.935901     4.312721    -0.60793453
   10.420002     0.81972086   9.751318     7.719472     8.160555
   -0.22633009   6.89016      6.2521358   -6.127429     3.5153356
   -6.3698115 ]], shape=(1, 66), dtype=float32)


In [334]:
''' Sample from the distribution '''
''' It is important to sample from this distribution '''
''' as taking the argmax of the distribution can easily get the model stuck in a loop. '''
prediction_id = tf.random.categorical(predicted_logits, num_samples=1)
print("Prediction Id: ", prediction_id[0,0])
print("Prediction Character: ", chars_from_ids(prediction_id[0,0]))


Prediction Id:  tf.Tensor(51, shape=(), dtype=int64)
Prediction Character:  tf.Tensor(b'l', shape=(), dtype=string)


In [335]:
#### Combining the above step into a function
def one_step_prediction(input_chars):
    ''' Convert to Integers'''
    input_ids = ids_from_chars(tf.strings.unicode_split(input_chars, 'UTF-8'))
    input_ids = tf.expand_dims(input_ids, axis=0)
    
    ''' Make Prediction '''
    predicted_logits = model(input_ids)
    
    ''' Get Last Prediction & Apply Mask '''
    last_pred = predicted_logits[:, -1, :]
    last_pred = last_pred + prediction_mask
    ''' Sample an output '''
    prediction_id = tf.random.categorical(last_pred, num_samples=1)
    prediction_id = tf.squeeze(prediction_id, axis=-1)
    
    ''' Return Characted'''
    return chars_from_ids(prediction_id)[0]

In [336]:
next_char = tf.constant('ROMEO:')
result = []
result.append([next_char])

for n in range(1000):
    next_char = one_step_prediction(next_char)
    result.append([next_char])

''' Combine all stings '''
res = tf.strings.join(result)
''' Print Output'''
print(res.numpy()[0].decode('utf-8'))

ROMEO:
UMyorater sead VETRONELIScous tr hareke
Biofavie when be.
ARO:
THete thelfevin alaiod y.

KEnonge,
O, vetos, IORD he?
Prees a tu bor.
Tho.
Ano, ier:

WANCllitoofftheraus fomen, s menopler d atwe inco for?
Ve come, IO:
I d,
ONCHeencogo buncon ay s anican oth, rea n gofor
ANCINofoullirestryoug s,
ICERYoutathize
ORIUEl.
S:
ORird:
HAND gacofaneencin; pee s y my w s he-th coreat ha amizerorast oss,
Thorgeasthealu a se a stou he;
BEO:
Be;
A:
CKal deareve
We ote sp my tinou,
Whan a f d ise my iroldomeshathy thevens t, ganomireathy;
THily at! helath s,

KE:
STRe ecusersousone,
I:
Anofe.
A: hare hath jul RDYoofowoul.
A williore,
Homyoutwio I t, I o, fouplo;

STheeles ftis hathee, and;
PELIDURTeve w t o n,

Ther'd maichous weato mybyiokeadyouldrsbea t TETonthe fant melout IO:
Ticube d h yofier weenoud te w the sst r h of ce denoutas ioove iowee IORYo, suthonld d st pr, otor fo wnd my t hatu d hest ORIENO:
ORD a ho gate ie,
Wis omal waseo ng my

A: the s y horond swexeg,
TOuts, rut, ELORYo

# Stateful LSTM

In [311]:
''' Our Input length is Sequence length: 100'''
x = tf.keras.layers.Input(
    shape=(None, ),
    batch_size=64
)

''' Each id in Vocabulary is mapped to a 256 length vector '''
embeddings = tf.keras.layers.Embedding(
    input_dim=vocab_size, 
    output_dim=embedding_dimensions,
    name="Embedding_Layer"
)(x)

''' GRU Layer '''
gru_output = tf.keras.layers.GRU(
    units=LSTM_Cells,
    return_sequences=True,
    stateful=True,
    name="GRU_Layer"
)(embeddings)

''' Prediction layer '''
prediction = tf.keras.layers.Dense(
    units=vocab_size,
    name="Dense_Layer"
)(gru_output)

model_stateful = tf.keras.Model(inputs=x, outputs=[prediction])
model_stateful.summary()

Model: "model_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_13 (InputLayer)       [(64, None)]              0         
                                                                 
 Embedding_Layer (Embedding)  (64, None, 256)          16896     
                                                                 
 GRU_Layer (GRU)             (64, None, 1024)          3938304   
                                                                 
 Dense_Layer (Dense)         (64, None, 66)            67650     
                                                                 
Total params: 4,022,850
Trainable params: 4,022,850
Non-trainable params: 0
_________________________________________________________________


In [None]:
epochs = 20
loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()
# Directory where the checkpoints will be saved
checkpoint_dir = './training_dir/stateful/v2'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

### Train

In [303]:
@tf.function
def train_function(model_stateful, inputs):
    X, label = inputs
    with tf.GradientTape() as tape:
        prediction = model_stateful(X)
        loss = loss_function(label, prediction)
    training_variables = model_stateful.trainable_variables
    gradient = tape.gradient(loss, training_variables)
    optimizer.apply_gradients(zip(gradient, training_variables))
    return {'loss':loss}

In [304]:
mean = tf.metrics.Mean()
state = None
for epoch in range(20):
    loss = 0
    for (batch_n, (inp, target)) in enumerate(dataset):
        logs = train_function(model_stateful, [inp, target])
        mean.update_state(logs['loss'])
        
    ''' Print Loss for an epoch'''
    print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')
    
    ''' Save every 5th Epoch model '''
    if (epoch + 1) % 5 == 0:
        model_stateful.save_weights(checkpoint_prefix.format(epoch=epoch))


Epoch 1 Loss: 2.7245
Epoch 2 Loss: 2.3704
Epoch 3 Loss: 2.1681
Epoch 4 Loss: 2.0265
Epoch 5 Loss: 1.9210
Epoch 6 Loss: 1.8393
Epoch 7 Loss: 1.7739
Epoch 8 Loss: 1.7196
Epoch 9 Loss: 1.6735
Epoch 10 Loss: 1.6334
Epoch 11 Loss: 1.5976
Epoch 12 Loss: 1.5652
Epoch 13 Loss: 1.5353
Epoch 14 Loss: 1.5073
Epoch 15 Loss: 1.4808
Epoch 16 Loss: 1.4554
Epoch 17 Loss: 1.4309
Epoch 18 Loss: 1.4072
Epoch 19 Loss: 1.3841
Epoch 20 Loss: 1.3615


### Prediction Function

In [312]:
''' Our Input length is Sequence length: 100'''
x = tf.keras.layers.Input(
    shape=(None, ),
    batch_size=1
)

''' Each id in Vocabulary is mapped to a 256 length vector '''
embeddings = tf.keras.layers.Embedding(
    input_dim=vocab_size, 
    output_dim=embedding_dimensions,
    name="Embedding_Layer"
)(x)

''' GRU Layer '''
gru_output = tf.keras.layers.GRU(
    units=LSTM_Cells,
    return_sequences=True,
    stateful=True,
    name="GRU_Layer"
)(embeddings)

''' Prediction layer '''
prediction = tf.keras.layers.Dense(
    units=vocab_size,
    name="Dense_Layer"
)(gru_output)

new_model_stateful = tf.keras.Model(inputs=x, outputs=[prediction])
new_model_stateful.summary()

Model: "model_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_14 (InputLayer)       [(1, None)]               0         
                                                                 
 Embedding_Layer (Embedding)  (1, None, 256)           16896     
                                                                 
 GRU_Layer (GRU)             (1, None, 1024)           3938304   
                                                                 
 Dense_Layer (Dense)         (1, None, 66)             67650     
                                                                 
Total params: 4,022,850
Trainable params: 4,022,850
Non-trainable params: 0
_________________________________________________________________


In [317]:
new_model_stateful.load_weights('./training_dir/stateful/v2/ckpt_19')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1ffe61b2620>

In [323]:
#### Combining the above step into a function
def one_step_prediction_stateful(model_stateful, input_chars):
    ''' Convert to Integers'''
    input_ids = ids_from_chars(tf.strings.unicode_split(input_chars, 'UTF-8'))
    input_ids = tf.expand_dims(input_ids, axis=0)
    
    ''' Make Prediction '''
    predicted_logits = model_stateful(input_ids)
    ''' Get Last Prediction & Apply Mask '''
    last_pred = predicted_logits[:, -1, :]
    last_pred = last_pred + prediction_mask
    ''' Sample an output '''
    prediction_id = tf.random.categorical(last_pred, num_samples=1)
    prediction_id = tf.squeeze(prediction_id, axis=-1)
    
    ''' Return Characted'''
    return chars_from_ids(prediction_id)[0]

In [324]:
next_char = tf.constant('ROMEO:')
result = []
result.append([next_char])

for n in range(1000):
    next_char = one_step_prediction_stateful(new_model_stateful, next_char)
    result.append([next_char])
res = tf.strings.join(result)
print(res.numpy()[0].decode('utf-8'))

ROMEO:
First, bad despised, shall straight I know
Out of their neither accessith; but I fear my father, brought to that?

HORTENSIO:
The vabour is wondrous question,--
For God's sake, nancius, your fortunes forgit you study by hers,
For in a trunk and nicely-wench we let them very haste:
There to that art thine.

LUCENTIO:
The gods sir, were you so curst in fault, and nothing of all
My masters for thee man! withto his state, will rise or seven,
Keave stands, betwixt us. But I shall not be savian:
The more my lord but what you are.

Petake horn-bed.
And, let it straight.

PETRUCHIO:
He's a husband for it shall, I fear, a boy.

CLAUDIO:
Lendle Ledjul's foot.
My bridal drount; I love not ot could spiet upon my husband.
Gong to his welcomed for you;
I, how he being, O, what either appliar,
And hold to her down for Baptista should be mine:
Do he might wear the clouds, as Every stage and good would incide no more; and craces this of his sustaining starms
Perduard an evil of my foot,
That til

### Stateful uising fit

In [339]:
''' Our Input length is Sequence length: 100'''
x = tf.keras.layers.Input(
    shape=(None, ),
    batch_size=64
)

''' Each id in Vocabulary is mapped to a 256 length vector '''
embeddings = tf.keras.layers.Embedding(
    input_dim=vocab_size, 
    output_dim=embedding_dimensions,
    name="Embedding_Layer"
)(x)

''' GRU Layer '''
gru_output = tf.keras.layers.GRU(
    units=LSTM_Cells,
    return_sequences=True,
    stateful=True,
    name="GRU_Layer"
)(embeddings)

''' Prediction layer '''
prediction = tf.keras.layers.Dense(
    units=vocab_size,
    name="Dense_Layer"
)(gru_output)

model = tf.keras.Model(inputs=x, outputs=prediction)
model.summary()

Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_16 (InputLayer)       [(64, None)]              0         
                                                                 
 Embedding_Layer (Embedding)  (64, None, 256)          16896     
                                                                 
 GRU_Layer (GRU)             (64, None, 1024)          3938304   
                                                                 
 Dense_Layer (Dense)         (64, None, 66)            67650     
                                                                 
Total params: 4,022,850
Trainable params: 4,022,850
Non-trainable params: 0
_________________________________________________________________


In [345]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss)
# Directory where the checkpoints will be saved
checkpoint_dir = './training_dir/stateful/v3_fit'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [346]:
EPOCHS = 20
history = model.fit(
    dataset, 
    epochs=EPOCHS, 
    callbacks=[checkpoint_callback],
    workers=8
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [347]:
new_model_stateful.load_weights('./training_dir/stateful/v3_fit/ckpt_19')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1ffe61dc100>

In [348]:
next_char = tf.constant('ROMEO:')
result = []
result.append([next_char])

for n in range(1000):
    next_char = one_step_prediction_stateful(new_model_stateful, next_char)
    result.append([next_char])
res = tf.strings.join(result)
print(res.numpy()[0].decode('utf-8'))

ROMEO:

Provost:

ISABELLA:
Yes, I will not.

PETRUCHIO:
Well, and say you?

ANGELO:
What is't, widdon, Saint George, I would be unknown;
For maidy, yet this would be myself,
With fain and age: command an alute good,
And, from the thing we spake of sovereign.

KATHARINA:
I dare not say mehee o jest or it:
That I wear nothing
Shall vie; sweet Isabel,
I will not so unto my brother
And most gue dishonourebour than a man.

MIRANDA:
How goes it to?

TRANIO:
You
go to physic then person belock,
As to speak to the pedvant: speak me, Signior Lucentio horse;
For 'tis acquainted with!
What, shall we go? then wagars we?
Shall we wear her in her eyes,
Be strange: there shall speak before you do: I had mine arrive her
To take this mere favou loudest, as honest
Than Seeming of Signior Bracks now in Padua for thy life
Persuadesturn'd to your holy love.

SEBASTIAN:
O Arily, you are telf?

BENVOLIO:
Tell me, Harry, were it become this.

PETRUCHIO:
Nay, that Parland where it is: I pray you, be your bida