In [None]:
import tensorflow as tf

import numpy as np
import os
import time

In [None]:
path_to_file = '/content/speech.txt'

In [None]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 41570 characters


In [None]:
# Take a look at the first 250 characters in text
print(text[:250])

STEVE JOBS,  JUNE 14 2005, STANFORD  I am honored to be with you today at your commencement from one of the finest universities in the world. I never graduated from college. Truth be told, this is the closest I've ever gotten to a college graduation.


In [None]:
# The unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

81 unique characters


In [None]:
example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [None]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [None]:
ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[48, 49, 50, 51, 52, 53, 54], [71, 72, 73]]>

In [None]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [None]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [None]:
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [None]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [None]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(41570,), dtype=int64, numpy=array([41, 42, 28, ..., 50, 58, 10])>

In [None]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [None]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

S
T
E
V
E
 
J
O
B
S


In [None]:
seq_length = 100


The `batch` method lets you easily convert these individual characters to sequences of the desired size.

In [None]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'S' b'T' b'E' b'V' b'E' b' ' b'J' b'O' b'B' b'S' b',' b' ' b' ' b'J'
 b'U' b'N' b'E' b' ' b'1' b'4' b' ' b'2' b'0' b'0' b'5' b',' b' ' b'S'
 b'T' b'A' b'N' b'F' b'O' b'R' b'D' b' ' b' ' b'I' b' ' b'a' b'm' b' '
 b'h' b'o' b'n' b'o' b'r' b'e' b'd' b' ' b't' b'o' b' ' b'b' b'e' b' '
 b'w' b'i' b't' b'h' b' ' b'y' b'o' b'u' b' ' b't' b'o' b'd' b'a' b'y'
 b' ' b'a' b't' b' ' b'y' b'o' b'u' b'r' b' ' b'c' b'o' b'm' b'm' b'e'
 b'n' b'c' b'e' b'm' b'e' b'n' b't' b' ' b'f' b'r' b'o' b'm' b' ' b'o'
 b'n' b'e' b' '], shape=(101,), dtype=string)


In [None]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'STEVE JOBS,  JUNE 14 2005, STANFORD  I am honored to be with you today at your commencement from one '
b'of the finest universities in the world. I never graduated from college. Truth be told, this is the c'
b"losest I've ever gotten to a college graduation. Today I want to tell you three stories from my life."
b" That's it. No big deal. Just three stories.   The first story is about connecting the dots.  I dropp"
b'ed out of Reed College after the first 6 months, but then stayed around as a drop-in for another 18 m'


In [None]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [None]:
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [None]:
dataset = sequences.map(split_input_target)

In [None]:
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'STEVE JOBS,  JUNE 14 2005, STANFORD  I am honored to be with you today at your commencement from one'
Target: b'TEVE JOBS,  JUNE 14 2005, STANFORD  I am honored to be with you today at your commencement from one '


In [None]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [None]:
# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [None]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [None]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

Note: For training you could use a `keras.Sequential` model here. To  generate text later you'll need to manage the RNN's internal state. It's simpler to include the state input and output options upfront, than it is to rearrange the model architecture later. For more details see the [Keras RNN guide](https://www.tensorflow.org/guide/keras/rnn#rnn_state_reuse).

In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 82) # (batch_size, sequence_length, vocab_size)


In [None]:
model.summary()

Model: "my_model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     multiple                  20992     
                                                                 
 gru_2 (GRU)                 multiple                  3938304   
                                                                 
 dense_2 (Dense)             multiple                  84050     
                                                                 
Total params: 4,043,346
Trainable params: 4,043,346
Non-trainable params: 0
_________________________________________________________________


In [None]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [None]:
sampled_indices

array([47, 69,  7,  5, 45, 23, 70, 12, 26, 32, 61, 31, 48, 53,  4, 41, 16,
       39,  5, 72, 58, 62, 50,  8, 54, 80, 33, 33, 41,  1, 20, 62, 78, 30,
       38, 17, 52, 26,  2, 56, 63, 10, 18, 30, 74,  1,  9, 34,  1, 56, 74,
       21,  8, 35,  7, 10, 11, 68, 44, 18,  5, 35, 38, 61, 72, 53, 43, 19,
        5, 39, 46,  3, 46, 51, 14, 39, 58, 27, 12, 44, 80, 25, 57, 71, 49,
       20, 70, 41, 43, 32, 11, 31, 30, 48, 13, 47, 57, 17, 49, 15])

In [None]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b'y in Albuquerque that had begun making the world\xe2\x80\x99s first personal computers. I offered to sell them '

Next Char Predictions:
 b'Yv\'$W?w1CInHaf"S5P$ykoc,g\xe2\x80\x9dJJS\n9o\xe2\x80\x99GO6eC ip.7G\xc2\xa2\n-K\ni\xc2\xa2:,L\'.0uV7$LOnyfU8$PX!Xd3PkD1V\xe2\x80\x9dBjxb9wSUI0HGa2Yj6b4'


In [None]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 82)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.407855, shape=(), dtype=float32)


In [None]:
tf.exp(example_batch_mean_loss).numpy()

82.09319

In [None]:
model.compile(optimizer='adam', loss=loss)

In [None]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [None]:
EPOCHS = 100

In [None]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [None]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO: be abue to think the laves of the United States.

We were shock.

We deneral grvan trans to asswer had never grade that she say of manicabed to be able to show that a program is vaccinating millions more children. You have to be able to show that a program is vaccinating millions efreeting that well, the sindler essences on Marrially, I really dadn’t rus and time you run it ont that my mother had never graduated from college. Truth be toive to gonate to in prow-con lives of deepasity of elerable dissanity. It was awful tast, and intuity be a valievingo.

But for you. It was tough going there in the beginning. Because initially – the initial thought with PayPalwanth. 


““All ination of esperience especially striking was that I had have would have them. If I had sped that with the lasted taking live from dightented on my you will judge yourselves not on your professional accomplishments alone, but also on how well you theared conglimation ofteries the millions of childrengly who 

In [None]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b'ROMEO: be begin inves do that with the reals of the situation is sochod and quise rife. Sto I was your laver \xe2\x80\x94 and that makes it hard fom their caring to matter had eccance to grat to Applick is a thought that wirh. And ve geen a dow\xe2\x80\x99r hond \xe2\x80\x93ian age class tranged the furs it all computer, mith sureds.  When I was young, there was an amaznization of Phise. Eou alop tree approach \xe2\x80\x93 is to measure the impact of your work and share you. Be actial gepars. That\xe2\x80\x99s why I was diycussion \xe2\x80\x94 smart people with the promise of this age, I want to exhort each of the graduates here to take on an issue \xe2\x80\x93 the andiclieation in the mort of my here not question.\n\nSt, it Als in 2008, reach orbit. That was also want to do what I was billion to grappact. And mo beca eors ked, to soother and allont any phiseds efreetit people thinking you do fouran a mote from graduate some of the work \xe2\x80\x93 basically if I can adv

In [None]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')



In [None]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO: be bega softciter started that likely be okay for the pact, I’m prevential ngward you the pabling o


In [None]:
class CustomTraining(MyModel):
  @tf.function
  def train_step(self, inputs):
      inputs, labels = inputs
      with tf.GradientTape() as tape:
          predictions = self(inputs, training=True)
          loss = self.loss(labels, predictions)
      grads = tape.gradient(loss, model.trainable_variables)
      self.optimizer.apply_gradients(zip(grads, model.trainable_variables))

      return {'loss': loss}

In [None]:
model = CustomTraining(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [None]:
model.fit(dataset, epochs=1)



<keras.callbacks.History at 0x7f525666cb20>

In [None]:
EPOCHS = 10

mean = tf.metrics.Mean()

for epoch in range(EPOCHS):
    start = time.time()

    mean.reset_states()
    for (batch_n, (inp, target)) in enumerate(dataset):
        logs = model.train_step([inp, target])
        mean.update_state(logs['loss'])

        if batch_n % 50 == 0:
            template = f"Epoch {epoch+1} Batch {batch_n} Loss {logs['loss']:.4f}"
            print(template)

    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))

    print()
    print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')
    print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')
    print("_"*80)

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 2.1486
Epoch 1 Batch 50 Loss 2.0788
Epoch 1 Batch 100 Loss 1.9860
Epoch 1 Batch 150 Loss 1.8818

Epoch 1 Loss: 2.0011
Time taken for 1 epoch 10.89 sec
________________________________________________________________________________
Epoch 2 Batch 0 Loss 1.8184
Epoch 2 Batch 50 Loss 1.7265
Epoch 2 Batch 100 Loss 1.6838
Epoch 2 Batch 150 Loss 1.6568

Epoch 2 Loss: 1.7197
Time taken for 1 epoch 10.29 sec
________________________________________________________________________________
Epoch 3 Batch 0 Loss 1.6051
Epoch 3 Batch 50 Loss 1.5707
Epoch 3 Batch 100 Loss 1.5605
Epoch 3 Batch 150 Loss 1.5258

Epoch 3 Loss: 1.5564
Time taken for 1 epoch 10.36 sec
________________________________________________________________________________
Epoch 4 Batch 0 Loss 1.4199
Epoch 4 Batch 50 Loss 1.4695
Epoch 4 Batch 100 Loss 1.4491
Epoch 4 Batch 150 Loss 1.4235

Epoch 4 Loss: 1.4561
Time taken for 1 epoch 10.30 sec
_____________________________________________________________________