# Text generation (RNN) 
Much of this code is from [Tensorflow's tutotial](https://www.tensorflow.org/text/tutorials/text_generation).

In [None]:
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing

import numpy as np
import os
import time

In [None]:
# read, then decode
text = open('/content/compiled_lafferty.txt', 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 6964698 characters


### processing text

In [None]:
# vecorize the text

vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

191 unique characters


In [None]:
# converting characters to numeric IDs

chars = tf.strings.unicode_split(text, input_encoding='UTF-8')
chars

<tf.Tensor: shape=(6964698,), dtype=string, numpy=array([b'R', b'e', b'm', ..., b'o', b'.', b'\n'], dtype=object)>

In [None]:
# creating the preprocessing.StringLookup layer

ids_from_chars = preprocessing.StringLookup(vocabulary=list(vocab), mask_token=None)

In [None]:
# converting tokens to character IDs

ids = ids_from_chars(chars)
ids

<tf.Tensor: shape=(6964698,), dtype=int64, numpy=array([51, 69, 77, ..., 79, 15,  1])>

In [None]:
# invert and recover strings

chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None
)

In [None]:
chars = chars_from_ids(ids)
chars

<tf.Tensor: shape=(6964698,), dtype=string, numpy=array([b'R', b'e', b'm', ..., b'o', b'.', b'\n'], dtype=object)>

In [None]:
# join characters back into strings

tf.strings.reduce_join(chars, axis=-1).numpy()



In [None]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

### training and testing

In [None]:
# convert text vectors into stream of character indices

all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(6964698,), dtype=int64, numpy=array([51, 69, 77, ..., 79, 15,  1])>

In [None]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [None]:
for ids in ids_dataset.take(15):
  print(chars_from_ids(ids).numpy().decode('utf-8'))

R
e
m
e
m
b
e
r
 
t
h
e
s
e
 


In [None]:
seq_length = 100 # characters from text
examples_per_epoch = len(text)//(seq_length+1)

In [None]:
# batch method to convert characters to sequences

sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'R' b'e' b'm' b'e' b'm' b'b' b'e' b'r' b' ' b't' b'h' b'e' b's' b'e'
 b' ' b't' b'h' b'i' b'n' b'g' b's' b',' b' ' b'b' b'u' b'r' b'n' b' '
 b't' b'h' b'e' b'm' b' ' b'i' b'n' b't' b'o' b' ' b'y' b'o' b'u' b'r'
 b' ' b'm' b'i' b'n' b'd' b',' b'\n' b't' b'h' b'i' b'n' b'k' b' ' b'o'
 b'f' b' ' b't' b'h' b'e' b'm' b' ' b'a' b'l' b'w' b'a' b'y' b's' b':'
 b'\n' b'T' b'h' b'e' b' ' b'P' b'a' b'r' b't' b'i' b'c' b'u' b'l' b'a'
 b'r' b' ' b'U' b'n' b'i' b'v' b'e' b'r' b's' b'e' b',' b' ' b'a' b' '
 b'k' b'i' b'n'], shape=(101,), dtype=string)


In [None]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'Remember these things, burn them into your mind,\nthink of them always:\nThe Particular Universe, a kin'
b'etic three-\ndimensional ellipse with three foci and consisting of\nfour suns and seventeen habitable w'
b'orlds, from\nGaea-Earth around Sol-Sun to the elegant planets\naround the Proxima and Alpha Suns, to th'
b'e inelegant\nplanets around the Beta Sun, the most inelegant of\nall being the three Trader Planets, Em'
b'porion,\nApateon, and Klepsis. Of these three, Emporion has\nno law, Apateon has no ethics, and Klepsis'


In [None]:
# function takes sequence as input, duplicates, and shifts to align input and label for each timestep

def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [None]:
dataset = sequences.map(split_input_target)

### training batch

In [None]:
# "...before feeding this data into the model, "
# batch size

BATCH_SIZE = 64

# buffer size to shuffle the dataset

BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

### building the model

In [None]:
# the code below is sourced from https://www.tensorflow.org/text/tutorials/text_generation#build_the_model

# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [None]:
# the code below is sourced from https://www.tensorflow.org/text/tutorials/text_generation#build_the_model

class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) # vocab_size is 191, embedding_dim is 256
    self.gru = tf.keras.layers.GRU(rnn_units, # rnn_units is 1024
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [None]:
# the code below is sourced from https://www.tensorflow.org/text/tutorials/text_generation#build_the_model

model = MyModel(
    # Be sure the vocabulary size matches the `StringLookup` layers.
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

### trying the model

In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 192) # (batch_size, sequence_length, vocab_size)


In [None]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  49152     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  196800    
                                                                 
Total params: 4,184,256
Trainable params: 4,184,256
Non-trainable params: 0
_________________________________________________________________


"To get actual predictions from the model you need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary." @ https://www.tensorflow.org/text/tutorials/text_generation#build_the_model

In [None]:
# first example batch

sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [None]:
# returns a prediction of the next character index

sampled_indices

array([ 20, 101, 126,   1, 146, 176, 135,  80, 126,  33,   8, 169, 106,
        46,  13,  46, 138, 185,  54, 176,  55,  17,  91,   8,  24, 137,
        75,   3,  63,  40,  19, 170,   9,  76, 124,  28,  33, 117,  70,
       178,  24,  71,  44, 119,  73,  79,   4,  24,  95,   0, 166,  71,
       106, 105,  15,  94, 190, 148,  36,   2,  13, 184, 149, 128, 110,
       182,  33, 125,  67, 158, 167, 135,  72, 146, 189, 170,  71,  35,
         4, 157, 108,  91, 176,  11, 111, 134, 140,  20, 100,  88,  15,
       190, 146,  80,  62, 115, 175, 180,   6, 143])

In [None]:
# text predicted by the untrained model. LOL...

print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b'\nthat fancy talk that machines talk to one another?\xe2\x80\x9d\nPeople, people, earless, eyeless, touchless, no'

Next Char Predictions:
 b"3\xc2\xba\xc4\x99\n\xd0\xbf\xe1\xba\xa1\xca\xbbp\xc4\x99@&\xdb\x8c\xc3\xa0M,M\xd0\x93\xe2\x80\xa2U\xe1\xba\xa1V0{&7\xd0\x90k _G2\xdb\xb1'l\xc4\x93;@\xc3\xaef\xe1\xbb\xa57gK\xc3\xb1io!7\xc2\xa9[UNK]\xd9\x87g\xc3\xa0\xc3\x8d.\xc2\xa7\xe5\xa4\x9a\xd1\x81C\r,\xe2\x80\x9d\xd1\x83\xc4\xb1\xc3\xa5\xe2\x80\x99@\xc4\x97c\xd8\xb1\xd9\x88\xca\xbbh\xd0\xbf\xe3\x83\xaa\xdb\xb1gB!\xd8\xaf\xc3\xa2{\xe1\xba\xa1)\xc3\xa7\xc8\x99\xd0\xa13\xc2\xb7x.\xe5\xa4\x9a\xd0\xbfp]\xc3\xab\xe1\x9f\x94\xe2\x80\x94#\xd0\xb5"


### training the model

"At this point the problem can be treated as a standard classification problem. Given the previous RNN state, and the input this time step, predict the class of the next character." @ https://www.tensorflow.org/text/tutorials/text_generation#train_the_model

In [None]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
example_batch_loss = loss(target_example_batch, example_batch_predictions)
mean_loss = example_batch_loss.numpy().mean()
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", mean_loss)

Prediction shape:  (64, 100, 192)  # (batch_size, sequence_length, vocab_size)
Mean loss:         5.2595387


In [None]:
tf.exp(mean_loss).numpy()

192.39272

In [None]:
# saving results from parameters tested earlier
'''
Prediction shape:  (128, 250, 192)  # (batch_size, sequence_length, vocab_size)
Mean loss:         5.255569

tf.exp(mean_loss).numpy() = 191.6305
'''

'\nPrediction shape:  (128, 250, 192)  # (batch_size, sequence_length, vocab_size)\nMean loss:         5.255569\n\ntf.exp(mean_loss).numpy() = 191.6305\n'

In [None]:
model.compile(optimizer='adam', loss=loss)

### checkpoints

In [None]:
# directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'

# name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [None]:
# execute
EPOCHS = 15

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Tue Nov 16 05:34:51 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    33W / 250W |   1041MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [None]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [None]:
start = time.time()
states = None
next_char = tf.constant(['Generated text:'])
result = [next_char]

for n in range(2000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

Generated text:, the
heat and burning fingered jaws. And she answered about
them, a hopping slavery. But this is not confessing it, there's seen on
us partisac-kagan. Compariscons was high, aren't just during the third, buy
our bed, then considered in Faronica; he was a vaned in the
first time everybody left, and it was Mistake.
There was no signal fund for them.
"It isn't all dying," Helen said creeping, with breakthes. He was around, that is
that there are Papa Devil, from the salienc, of course. There seemed to have his finesfied
them, he would have got his coming.
“There is comprehensible stradglument and loose, so far of an I'll have a sort of end.
"My fence-and he whis essence of it did, so they would go
beyond. The grunk alevon will wrist.
There was also the light of my mouth and we
dropped here in the daughter of Thomas. How she used to see though
you believe that the answer is—is that I'm going to me about mamoriu,"
said Alice Sculphurman.
The oldest firm hes are liv-
iled on 

attempt #4 (15 epochs)

> Generated text: Dettions of it's make other soor, I ital not and the Doomes caution,
and three long would make time," (I am named McCalecturap over,
Then, move place place and
just), I believe their isnobactidier town who have a licked to the skysimer. “It is of-tieven, and the Days’d was only. “I was it is?” “In then early now, and the remore
Come adving as or conaster’s not being go not tubbulinections, is is somewhat has alwaysed:
"Why got is it? It's buff, tell you could the reached La-grown and in-cappled Progulas
young Polish," the things he would rocks on it."
"Verannoth wrong a shawping you can narringed and live?” Do
you ton was goness is a tauge an indaupting all the way. Yes, were something, whatter and said ink," Mence flight to be in a sembler whiteed
in set space."
Aurelia whisper leg's had twn mot perhy lady arome in the what in stamen it except
or it is letites of the chickicity somebody wople triftiat was ghands."
“Dame to brest companuted any almost all, you salve it,” said R.A. Lafferty c/cancers that words,” the quality illil. And Harked to bur.
I could creature age," "Oh, the Hizely strange when I do thy very more deree were one make in its own. He would every roys, but that. It was also and Brannagan.
“But you has a being me! You will could happens in a Slowardes, days, and the wonder by X. with it where. Old way words through that yout our would
be knopling bendits call Velong Johes suched.
The said it is one all. I other was anything. I do
not crimmes.' Ibsome Amaze a Sonthing and getting, Cavalola had to be wonderful whoy choole out act about then it was bug by Prived
down intrist was coming that was almost at been time.'
“Yes, that have how would, Gaetanner were given was a perome. Pew! And Prompses, my mapistasish
beach, that he completel I welpruid rack laugh the malsed joing.”
“I believe,” said. “I clear for ruce!,
I
constugy, mether at Baugh bookant, intoly haid eatting that
him, and shat air beauta, it fae, it would beson directy. they were all reann 



attempt #3 (50 epochs)



> Generated text: “The samem. I swop poract specames said the
stallook the spialt quilisas-encact mane intremped to gill
more thing online of ealstone."
“There barkaissed I
recarach, and muching wat. Snore!. Hniu dray comes tower by the whiched will the but the expecse. Holl remins of a laffots not the name of chanswed masif him bow weatury old fets feelly was one of that the distrachus that"
Aurride those brollog.
Speto the that years. Oill sautings togimuse, to he deriach were in her
goothind?” Robus, dof his shatir. Bagtle sit-sonted ark nook be squal pistingst form that ther. And the “am the so ials. Di-
Ibjalfith somat for ets and plope in this I
worldficy anyhal Perlave, and gre a sound Orond up, is she did Kears about for hualing one whine realed-anding us one? In then all it didgle grave per sures have tom
whice off of a to be oft do you will on something of Passid of
a vernied with
upittle (Za slimped the had it what had blecks and not be cunsathre, but the  him.s expesticathlen I like thing, the ming them faien
fall.
Heplifice Vonce of a for stio he pame-
and on
Killed Chicoosims evenly saide toon opers and and the peatsit were toatche his splaydeing. Is its in the all perm wisest onicy It if I dest the
ken tiond of it. It clanbly of a mom, of
then oth
regorrow-A With
faccl the refoomerns. he had it canbed dish up wiphts seeped. “I he said atain. Would boy
makenty fearl knowoss woll firs the day a massed. " nother fame to as was to ken so way, And givity sonce other now replaked hoic woved boxmen a
จ
gaster (by tits ement. He hised. “I groed and King. Hearsibe. They was itseners earsted deard out of with ty. Thosterner dismone or an?” Clare centersal.”. I was whought in tres. Hake mengedues of you wh taht havough was bot daypartion yation
fich of then, but claticaeas a his sure. We he been that illecan thatericare vooked.
“Who you whend it pron! see
humplry suithill whrow we of the Howwand
Asber talkest. Thot prove dut boesid, “ores from inter tham in own or man, cometer 



attempt #2 (20 epochs) Prediction shape:  (64, 100, 154)  # (batch_size, sequence_length, vocab_size)
Mean loss:         5.0362687

> Generated text:
Gaetan, and
know-arch-Aubli. The Unfrittened King
of Consider said. “I've traveled loud you. I am not alive.”
"It seems it, Gaetan,” I issued. “Strong-balker in disposal. It is programmed, the thing one of them bad down for them. There was a lunatical machine as put
out by the Doomsday Equation. “Oh, the associates bring
titled them and there a question wait, that is
not accounted being changed against them! They know what
world you are, Like, as is she a Man? Isly down there, Midland ?
D)X
TORRE
Does Anyone Else
Hinger) and People everywhere had
once tombs made, up through Krekis anyhow, and I have not
seen yourself,” Robert Derby asked.
“I don't have given dimpy dully, and took a long time. She could be seen and
went to the oldest one, down to one of them our only narration today, and to
yourself cannot present it for tears. We are anger,
not a artifact.
The eminent scientists were given, there were but there was a rich island as a market mape from a modern part
of it, and the rumports call me another thing.
“I am a grin to jump, to a larger and dead people of the
'Shining People' can should gave here," said as he waits in one hand. “It Couldn't Prester
theotic in spite of this. I thought this is one of the three of
them, the whiskers are we nur, and it's over.”
“Here, I'll raise them is no more than an extraction that were
sons in the special man named Sky-Severe. He was the winds and the telephone, skinted
and perfectly night, a good one. There are
other respaces of the type with the beginnings. For some Spirit
(Introduction by Harlay Laid
(Iron, guarant?" But they donch in loamal doctors. And along the man named Kirol Crabman. The battered light
became most likely too far between the anti-Earth; and could be had was—muric. a
Bun-gation that a selected center of the murderers seem to be
58
Second Prantivenes of America
About Sigar M. (Was bccacc. Green leveling
wasn't has no longer a groanith. He was giving, noneure,
statiarian, for they tried it.
“You can take 



attempt # 1 (5 epochs)
Prediction shape:  (64, 100, 154)  # (batch_size, sequence_length, vocab_size)
Mean loss:         5.0362687 

> Generated text: it had been distracted while looks like a grave jangues. The new the best belew merely the thought is not verical and too more head-of-fire-” rubbed him, and not things where such a grounds
made
her moutht. And even
had to talk in Skuth it, to treat it, but to yot have
remembered. “Gold Cousin Clootie's deep comparablishing wazny,” she said. They had been dungerous, the ship
is right when will they use it to do it. I myse Margaret (I bet he will say all, before; Bohn,
deviews. And the interval disturbery deeper mixed up from him," and write myself is a wally. “Like the bix of Angle Man whose chopping vehorter from
right together again, it could speak to take chorfly,” she
said. Preters and grotesque the cre a ped granded by-
zereing, aret.
ABread said that they went from pool, for, you recognized the darty.
But evencidence is nothing light” it is get many children.
So Odversil Chooted. “Who was corpser."
“Ecewaid, couldn't tell me. Many young write it on the dontest in Bright down a 