In [1]:
!pip install tensorflow

Successfully installed tensorflow-2.11.0


In [3]:
import tensorflow as tf
import numpy as np
import os
import keras
import time

In [4]:
path_to_file = keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [6]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print(len(text))

1115394


In [7]:
vocab = sorted(set(text))
print(vocab)
print(len(vocab))

['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
65


In [8]:
example_texts = ['abcdefgh', 'xyz']
chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
chars

2022-12-24 17:05:36.154029: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-12-24 17:05:36.154092: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2022-12-24 17:05:36.154130: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (azad-hplaptop15da0xxx): /proc/driver/nvidia/version does not exist
2022-12-24 17:05:36.197839: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'], [b'x', b'y', b'z']]>

In [10]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary = list(vocab), mask_token=None
)

ids = ids_from_chars(chars)
print(ids)

chars_from_ids = keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None 
)

chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46, 47], [63, 64, 65]]>


<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'], [b'x', b'y', b'z']]>

In [11]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1).numpy()

text_from_ids(ids)

array([b'abcdefgh', b'xyz'], dtype=object)

In [12]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

2022-12-24 17:07:37.471991: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 26769456 exceeds 10% of free system memory.


<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [13]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

for ids in ids_dataset.take(10):
  print(chars_from_ids(ids).numpy().decode('utf-8'))

F
i
r
s
t
 
C
i
t
i


In [14]:
seq_length = 100
sequence = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequence.take(2):
  print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)
tf.Tensor(
[b'a' b'r' b'e' b' ' b'a' b'l' b'l' b' ' b'r' b'e' b's' b'o' b'l' b'v'
 b'e' b'd' b' ' b'r' b'a' b't' b'h' b'e' b'r' b' ' b't' b'o' b' ' b'd'
 b'i' b'e' b' ' b't' b'h' b'a' b'n' b' ' b't' b'o' b' ' b'f' b'a' b'm'
 b'i' b's' b'h' b'?' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'R' b'e' b's'
 b'o' b'l' b'v' b'e' b'd' b'.' b' ' b'r' b'e' b's' b'o' b'l' b'v' b'e'
 b'd' b'.' b'\n' b'\n' b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'

In [15]:
for seq in sequence.take(5):
  print(seq.shape)
  print(text_from_ids(seq))

(101,)
b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
(101,)
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
(101,)
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
(101,)
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
(101,)
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [16]:
def split_input_target(seq):
  return seq[:-1], seq[1:]

split_input_target("Tensorflow")

('Tensorflo', 'ensorflow')

In [18]:
dataset = sequence.map(split_input_target)

for input_seq, target_seq in dataset.take(2):
  print(input_seq.shape)
  print(text_from_ids(input_seq))
  print(text_from_ids(target_seq))

(100,)
b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
(100,)
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you '
b're all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'


In [19]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (
    dataset.shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder = True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

In [20]:
vocab_size = len(ids_from_chars.get_vocabulary())
embedding_dim = 256
rnn_units = 1024

class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x


In [None]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units
)
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam',loss=loss)

checkpoints_dir = "./training_checkpoints"
checkpoint_prefix = os.path.join(checkpoints_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)
Epochs = 20
history = model.fit(dataset, epochs=Epochs, callbacks=[checkpoint_callback])

In [49]:
for input_batch, target_batch in dataset.take(1):
  batch_prediction = model(input_batch)
  sampled_indices = tf.random.categorical(batch_prediction[0], num_samples=1)
  sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

  mean_loss = loss(target_batch, batch_prediction)
  print("Prediction shape", batch_prediction.shape)
  print("mean loss", mean_loss)
  print(tf.exp(mean_loss).numpy())

  print("Input: ", text_from_ids(input_batch[0]))
  print()
  print("Output: ", text_from_ids(sampled_indices))

Prediction shape (64, 100, 66)
mean loss tf.Tensor(1.7043811, shape=(), dtype=float32)
5.497982
Input:  b'e, and a nimble hand, is\nnecessary for a cut-purse; a good nose is requisite\nalso, to smell out work'

Output:  b't snc w vogeei,aadd.\nmn poah  onm hor i yotw-osstl wndops gott at buyuat!o; Wb ,  Ih,hialk,ifr oirdt'


In [51]:
class OneStep():
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    self.temperature = temperature
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars
    self.model = model

    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        values = [-float('inf')]*len(skip_ids),
        indices=skip_ids,
        dense_shape=[len(ids_from_chars.get_vocabulary())]
    )
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = ids_from_chars(input_chars).to_tensor()

    predicted_logits, states = self.model(inputs=input_ids, states=states, return_state=True)

    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    predicted_logits = predicted_logits + self.prediction_mask

    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    predicted_chars = self.chars_from_ids(predicted_ids)
    return predicted_chars, states


In [52]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [None]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO'])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

In [45]:
tf.saved_model.save(model, 'model')
model = tf.saved_model.load('model')



INFO:tensorflow:Assets written to: model/assets


INFO:tensorflow:Assets written to: model/assets


In [46]:
type(model)

tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject

In [None]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO'])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)