# setup

In [1]:
# import tensorflow and other libraries
import os
import numpy as np
import time
import tensorflow as tf

2023-05-16 18:10:11.090737: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-16 18:10:11.566134: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/usr/local/cuda-10.2/lib64::/home/rmaia/miniconda3/envs/dim0494/lib/:/home/rmaia/miniconda3/envs/dim0494/lib/
2023-05-16 18:10:11.566199: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY

In [4]:
# download the Sheakespeare dataset
path_to_file ='shakespeare.txt'
if os.path.exists(path_to_file) is False:
    path_to_file =tf.keras.utils.get_file(
        'shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt'
        )

In [5]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 1115394 characters


In [6]:
# Take a look at the first 250 characters in text
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [8]:
# The unique characters in the file
vocab = sorted(set(text))
print(f'The data has {len(vocab)} unique characters')

The data has 65 unique characters


# text pre-processing

In [11]:
# Before training, you need to convert the strings to a numerical representation.

# The tf.keras.layers.StringLookup layer can convert each character into a numeric ID.
# It just needs the text to be split into tokens first.

In [12]:
# example of text
example_texts = ['Life is not fair.', 'But you have to deal with it.']

# tokenization
chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')

# print results
print(f'Text examples: {example_texts}\n')
print(f'Tokenized version: {chars}\n')

Text examples: ['Life is not fair.', 'But you have to deal with it.']

Tokenized version: <tf.RaggedTensor [[b'L', b'i', b'f', b'e', b' ', b'i', b's', b' ', b'n', b'o', b't', b' ',
  b'f', b'a', b'i', b'r', b'.']                                          ,
 [b'B', b'u', b't', b' ', b'y', b'o', b'u', b' ', b'h', b'a', b'v', b'e',
  b' ', b't', b'o', b' ', b'd', b'e', b'a', b'l', b' ', b'w', b'i', b't',
  b'h', b' ', b'i', b't', b'.']                                          ]>



In [13]:
# Now create the tf.keras.layers.StringLookup layer:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab),
    mask_token=None)

In [14]:
# Use the layer above to convert characters into numbers
ids = ids_from_chars(chars)
print(f'These are the ids of the examples above: {ids}\n')

These are the ids of the examples above: <tf.RaggedTensor [[25, 48, 45, 44, 2, 48, 58, 2, 53, 54, 59, 2, 45, 40, 48, 57, 9],
 [15, 60, 59, 2, 64, 54, 60, 2, 47, 40, 61, 44, 2, 59, 54, 2, 43, 44, 40,
  51, 2, 62, 48, 59, 47, 2, 48, 59, 9]                                   ]>



Since the goal is to generate text, it will also be important to invert this representation and recover human-readable strings from it.

For this you can use tf.keras.layers.StringLookup(..., invert=True).

Note: Here instead of passing the original vocabulary generated with sorted(set(text)) use the get_vocabulary() method of the tf.keras.layers.StringLookup layer so that the [UNK] tokens is set the same way.

In [16]:
# create the inversion layer
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(),
    invert=True,
    mask_token=None)

In [18]:
# convert the ids back into characters
chars = chars_from_ids(ids)
print(f'These are the chars converted from ids: {chars}\n')

These are the chars converted from ids: <tf.RaggedTensor [[b'L', b'i', b'f', b'e', b' ', b'i', b's', b' ', b'n', b'o', b't', b' ',
  b'f', b'a', b'i', b'r', b'.']                                          ,
 [b'B', b'u', b't', b' ', b'y', b'o', b'u', b' ', b'h', b'a', b'v', b'e',
  b' ', b't', b'o', b' ', b'd', b'e', b'a', b'l', b' ', b'w', b'i', b't',
  b'h', b' ', b'i', b't', b'.']                                          ]>



In [19]:
# You can tf.strings.reduce_join to join the characters back into strings.
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

# let's test it
t1 = text_from_ids(ids)
print(f'These are the examples (text) recoved from ids: {t1}\n')

These are the examples (text) recoved from ids: [b'Life is not fair.' b'But you have to deal with it.']



# Create training examples and targets

Next divide the text into example sequences. Each input sequence will contain seq_length characters from the text.

For each input sequence, the corresponding targets contain the same length of text, except shifted one character to the right.

So break the text into chunks of seq_length+1. For example, say seq_length is 4 and our text is "Hello". The input sequence would be "Hell", and the target sequence "ello".

To do this first use the tf.data.Dataset.from_tensor_slices function to convert the text vector into a stream of character indices.

In [22]:
# text -> stream of indices from the entire text.
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
print(f'These are the indices from the entire text : {all_ids}\n')
print(f'Number of indices : {len(all_ids)}\n')

These are the indices from the entire text : [19 48 57 ... 46  9  1]

Number of indices : 1115394



In [25]:
# Creates a dataset of indices
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [31]:
# Let's check on the what the dataset gives us
for ids in ids_dataset.take(15):
    print(f'id: {ids}, corresponding character: {chars_from_ids(ids).numpy().decode("utf-8")}')

id: 19, corresponding character: F
id: 48, corresponding character: i
id: 57, corresponding character: r
id: 58, corresponding character: s
id: 59, corresponding character: t
id: 2, corresponding character:  
id: 16, corresponding character: C
id: 48, corresponding character: i
id: 59, corresponding character: t
id: 48, corresponding character: i
id: 65, corresponding character: z
id: 44, corresponding character: e
id: 53, corresponding character: n
id: 11, corresponding character: :
id: 1, corresponding character: 



In [32]:
# size of the textual sequences to be used
seq_length = 100

In [33]:
# The batch method lets you easily convert these individual characters to sequences of the desired size.
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [34]:
# It's easier to see what this is doing if you join the tokens back into strings:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


For training you'll need a dataset of (input, label) pairs.
Where input and label are sequences.

At each time step the input is the current character and the label is the next character.

In [38]:
# Here's a function that takes a sequence as input, duplicates,
# and shifts it to align the input and label for each timestep:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

# let's test it
o1 = split_input_target(list("Tensorflow"))
print(f'If my input is \"Tensorflow\":\n Input: {o1[0]}\nOutput: {o1[1]}')

If my input is "Tensorflow":
 Input: ['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o']
Output: ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w']


In [39]:
# apply that function to the dataset we created
dataset = sequences.map(split_input_target)

In [42]:
# letś test i
for input_example, target_example in dataset.take(2):
    print(f'Input :\n {text_from_ids(input_example).numpy()}')
    print(f'Target:\n {text_from_ids(target_example).numpy()}')

Input :
 b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target:
 b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
Input :
 b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you '
Target:
 b're all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'


# Create training batches

You used tf.data to split the text into manageable sequences.

But before feeding this data into the model, you need to shuffle the data and pack it into batches.

In [43]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

# Build The Model

This section defines the model as a keras.Model subclass (For details see Making new Layers and Models via subclassing).

This model has three layers:

tf.keras.layers.Embedding: The input layer. A trainable lookup table that will map each character-ID to a vector with embedding_dim dimensions;

tf.keras.layers.GRU: A type of RNN with size units=rnn_units (You can also use an LSTM layer here.)

tf.keras.layers.Dense: The output layer, with vocab_size outputs. It outputs one logit for each character in the vocabulary. These are the log-likelihood of each character according to the model.

In [44]:
# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [46]:
# the model
class MyModel(tf.keras.Model):
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 rnn_units):
        super().__init__(self)
    
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units, return_sequences=True, return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self,
             inputs,
             states=None,
             return_state=False,
             training=False):
        
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)
        
        if return_state:
            return x, states
        else:
            return x

In [47]:
# load model
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

# Try the model

Now run the model to see that it behaves as expected.

In [48]:
# First check the shape of the output:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

2023-05-16 19:01:22.203311: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100


(64, 100, 66) # (batch_size, sequence_length, vocab_size)


In [49]:
# let's take a look at the summary
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 4,022,850
Trainable params: 4,022,850
Non-trainable params: 0
_________________________________________________________________


To get actual predictions from the model you need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.

Note: It is important to sample from this distribution as taking the argmax of the distribution can easily get the model stuck in a loop.

Try it for the first example in the batch:

In [51]:
# let's draw one sample
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)

# transform that into a numpy sequence
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

# print it
print(f'These are indices taken from the output of the model: {sampled_indices}\n')

These are indices taken from the output of the model: [13 64 40 58 20 17 17 19 21 16 11 46 57 13 56  0 50 65 32 13 30  2 63 33
 51  1 16 29 48 40 14 60 46  2 21  8 23 19  6 65 52 12 51 38 31 26 29 65
 29  2 65 12  8 15 60  6 57 49 10 30 57 34  8 62 57 28 41 32 20 42 19 40
 65  8  8 35 42  3 30 20 55  6 40 58 38 15 62 41  4 33 40 63 30 63  3 38
 34 64 43 63]



In [52]:
# Decode these to see the text predicted by this untrained model:
print(f'Input:\n, {text_from_ids(input_example_batch[0]).numpy()}\n')
print(f'Next Char Predictions:\n, {text_from_ids(sampled_indices).numpy()}')

Input:
, b'our, I know not well what they\nare: but precise villains they are, that I am sure\nof; and void of al'

Next Char Predictions:
, b"?yasGDDFHC:gr?q[UNK]kzS?Q xTl\nCPiaAug H-JF'zm;lYRMPzP z;-Bu'rj3QrU-wrObSGcFaz--Vc!QGp'asYBwb$TaxQx!YUydx"


# Train the model

At this point the problem can be treated as a standard classification problem.
Given the previous RNN state, and the input this time step, predict the class of the next character.

Attach an optimizer, and a loss function
The standard tf.keras.losses.sparse_categorical_crossentropy loss function works in this case because it is applied across the last dimension of the predictions.

Because your model returns logits, you need to set the from_logits flag.

In [53]:
# cost function
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [55]:
# test the cost function on a batch of samples
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print(f'Prediction shape: {example_batch_predictions.shape}') # (batch_size, sequence_length, vocab_size)")
print(f'Mean loss:        {example_batch_mean_loss}')

Prediction shape: (64, 100, 66)
Mean loss:        4.188908576965332


A newly initialized model shouldn't be too sure of itself, the output logits should all have similar magnitudes.

To confirm this you can check that the exponential of the mean loss is approximately equal to the vocabulary size.

A much higher loss means the model is sure of its wrong answers, and is badly initialized:

In [56]:
tf.exp(example_batch_mean_loss).numpy()

65.95077

In [57]:
# model compilation
model.compile(optimizer='adam', loss=loss)

# Configure checkpoints

Use a tf.keras.callbacks.ModelCheckpoint to ensure that checkpoints are saved during training:

In [58]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'

# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [59]:
# number of epochs
EPOCHS = 20

In [61]:
# training
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20


2023-05-16 22:12:14.241016: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:446 : INTERNAL: libdevice not found at ./libdevice.10.bc
2023-05-16 22:12:14.241072: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:446 : INTERNAL: libdevice not found at ./libdevice.10.bc


InternalError: Graph execution error:

Detected at node 'StatefulPartitionedCall_5' defined at (most recent call last):
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/traitlets/config/application.py", line 982, in launch_instance
      app.start()
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
      self._run_once()
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
      handle._run()
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2940, in run_cell
      result = self._run_cell(
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2995, in _run_cell
      return runner(coro)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3194, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3373, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3433, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_8264/4024136925.py", line 2, in <module>
      history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/engine/training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/engine/training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/engine/training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/engine/training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/engine/training.py", line 1027, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 527, in minimize
      self.apply_gradients(grads_and_vars)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1140, in apply_gradients
      return super().apply_gradients(grads_and_vars, name=name)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 634, in apply_gradients
      iteration = self._internal_apply_gradients(grads_and_vars)
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1166, in _internal_apply_gradients
      return tf.__internal__.distribute.interim.maybe_merge_call(
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1216, in _distributed_apply_gradients_fn
      distribution.extended.update(
    File "/home/rmaia/miniconda3/envs/dim0494/lib/python3.9/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1211, in apply_grad_to_update_var
      return self._update_step_xla(grad, var, id(self._var_key(var)))
Node: 'StatefulPartitionedCall_5'
libdevice not found at ./libdevice.10.bc
	 [[{{node StatefulPartitionedCall_5}}]] [Op:__inference_train_function_3092]

In [39]:
# text generation model
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [40]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [41]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:
Villain, stir! strike! do you know this countryman:
The swift are and of thyself none at the clusch,
What treachery face thrusts no harm such sworn,
Which, though thou wilt obedien: this time that
As after reward, or that he knews,
You worldle steel, thus
Glood'd unto justice footing to the tenter as
we hear himself.' but, then, I do not
Too hot to pardon Rome! humbly thou
affript the crown.

QUEEN MARGARET:
What would your genitate here?

O God,
What news, invoint, my tent?

CATESBY:
Now, Words, report is cold of his the covertood; your
hongiers. You are a Roman;
For sleep my slave: where he should hear him stand:
This is a great deputy, but the wish of breath,
That he hath done met, though a cup of Rosaline.

RIVERS:
And therefore be rilier, another cause.

GLOUCESTER:
What think so fars? Is the beauty o'er-heads
our foe, and beat your parts, York and Edward's love,
Immedies 'gainst the throne a heavenly hate;
And show no warrant changing ill:
Is it thou corclivites me in his 

In [42]:
# batch text generation
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b"ROMEO:\nThe which my sues, you are like gozer Marcius,\nEre we were all unhappier than you, against his strong:\nIre parted to reply, at the lie I'll lady\nAnd still to purpose and virtuous wrongs\nAnd downright by thine. And as I turn:\nTo hon, I, believe not this beful mine.\n\nRICHMOND:\nGod and hang'd children not the chirace of ourselves:\nWhy, then, as I do priso leave myself to\ndance.\n\nPAGE:\nThis proud infect what dangers or others, parts,\nYet that this mould be to stand, and be seen to be;\nFor me, that he be lost in heaven;\nThe comfort, do not say he is.\n\nGLOUCESTER:\nWhat, would you have meal due? when you have must one\nand honour to my clothes till now\nIn Margaret's sacred household room exile\nThat down while heaven confine you,\nShall satisfied that deadly was but a piteous crown,\nAnd never toward the nornes. The neal of tears!\n\nWARWICK:\nThen this, good friend! when every story reado\nI have spent my knot in blood and pity\nSomewhing your bones 

In [43]:
# save/load model
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')



2022-11-07 23:56:19.872573: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: one_step/assets


INFO:tensorflow:Assets written to: one_step/assets


In [44]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO:
O that came four thousand city!

CAMILLO:
Besides the people whipp
In thoughts of math obedient wit


# customized model

In [45]:
class CustomTraining(MyModel):
  @tf.function
  def train_step(self, inputs):
      inputs, labels = inputs
      with tf.GradientTape() as tape:
          predictions = self(inputs, training=True)
          loss = self.loss(labels, predictions)
      grads = tape.gradient(loss, model.trainable_variables)
      self.optimizer.apply_gradients(zip(grads, model.trainable_variables))

      return {'loss': loss}

In [46]:
model = CustomTraining(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [47]:
model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [48]:
model.fit(dataset, epochs=1)



<keras.callbacks.History at 0x7f186c090f70>

In [49]:
EPOCHS = 10

mean = tf.metrics.Mean()

for epoch in range(EPOCHS):
    start = time.time()

    mean.reset_states()
    for (batch_n, (inp, target)) in enumerate(dataset):
        logs = model.train_step([inp, target])
        mean.update_state(logs['loss'])

        if batch_n % 50 == 0:
            template = f"Epoch {epoch+1} Batch {batch_n} Loss {logs['loss']:.4f}"
            print(template)

    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))

    print()
    print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')
    print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')
    print("_"*80)

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 2.1580
Epoch 1 Batch 50 Loss 2.0946
Epoch 1 Batch 100 Loss 1.9529
Epoch 1 Batch 150 Loss 1.8900

Epoch 1 Loss: 1.9991
Time taken for 1 epoch 5.32 sec
________________________________________________________________________________
Epoch 2 Batch 0 Loss 1.8448
Epoch 2 Batch 50 Loss 1.7568
Epoch 2 Batch 100 Loss 1.6602
Epoch 2 Batch 150 Loss 1.6588

Epoch 2 Loss: 1.7273
Time taken for 1 epoch 4.96 sec
________________________________________________________________________________
Epoch 3 Batch 0 Loss 1.6104
Epoch 3 Batch 50 Loss 1.5547
Epoch 3 Batch 100 Loss 1.5241
Epoch 3 Batch 150 Loss 1.5220

Epoch 3 Loss: 1.5648
Time taken for 1 epoch 4.96 sec
________________________________________________________________________________
Epoch 4 Batch 0 Loss 1.5014
Epoch 4 Batch 50 Loss 1.5066
Epoch 4 Batch 100 Loss 1.4345
Epoch 4 Batch 150 Loss 1.4614

Epoch 4 Loss: 1.4629
Time taken for 1 epoch 4.92 sec
_________________________________________________________________________