In [1]:
import os
import numpy as np
import re
import shutil
import tensorflow as tf

2023-08-19 10:37:53.747455: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-19 10:37:53.834435: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-19 10:37:53.835457: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
DATA_DIR = "./data"
CHECKPOINT_DIR = os.path.join(DATA_DIR, "checkpoints")

In [3]:
def download_and_read(urls):
    texts = []
    for i, url in enumerate(urls):
        p = tf.keras.utils.get_file("ex1-{:d}.txt".format(i), url, cache_dir=".")
        text = open(p, "r").read()
        # remove byte order mark
        text = text.replace("/ufeff", "")
        # remove newlines
        text = text.replace("\n", " ")
        text = re.sub(r'\s+', " ", text)
        # add it to the List
        texts.extend(text)
    return texts

texts = download_and_read([
    "http://www.gutenberg.org/cache/epub/28885/pg28885.txt",
    "https://www.gutenberg.org/files/12/12-0.txt"
])

In [4]:
# create the vocabulary
vocab = sorted(set(texts))
print("vocab size: {:d}".format(len(vocab)))
## create mapping from vocab chars to ints
char2idx = {c:i for i, c in enumerate(vocab)}
idx2char = {i:c for c, i in char2idx.items()}

vocab size: 94


In [5]:
# numericize the texts
texts_as_ints = np.array([char2idx[c] for c in texts])
data = tf.data.Dataset.from_tensor_slices(texts_as_ints)
# number of characters to show before asking for prediction
# sequences: [None, 100]
seq_length = 100
sequences = data.batch(seq_length + 1, drop_remainder=True)

def split_train_labels(sequence):
    input_seq = sequence[0:-1]
    output_seq = sequence[1:]
    return input_seq, output_seq

sequences = sequences.map(split_train_labels)
# set up for training
# batches: [None, 64, 100]
batch_size = 64
steps_per_epoch = len(texts) // seq_length // batch_size
dataset = sequences.shuffle(10000).batch(batch_size, drop_remainder=True)

2023-08-19 10:37:57.725357: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [6]:
class CharGenModel(tf.keras.Model):
    def __init__(self, vocab_size, num_timesteps, embedding_dim, **kwargs):
        super(CharGenModel, self).__init__(**kwargs)
        self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.rnn_layer = tf.keras.layers.GRU(num_timesteps,
                                             recurrent_initializer=tf.keras.initializers.GlorotUniform,
                                             recurrent_activation='sigmoid',
                                             stateful=True,
                                             return_sequences=True)
        self.dense_layer = tf.keras.layers.Dense(vocab_size)
        
    def call(self, x):
        x = self.embedding_layer(x)
        x = self.rnn_layer(x)
        x = self.dense_layer(x)
        return x

vocab_size = len(vocab)
embedding_dim = 256

model = CharGenModel(vocab_size, seq_length, embedding_dim)
model.build(input_shape=(batch_size, seq_length))
model.summary()

Model: "char_gen_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  24064     
                                                                 
 gru (GRU)                   multiple                  107400    
                                                                 
 dense (Dense)               multiple                  9494      
                                                                 
Total params: 140958 (550.62 KB)
Trainable params: 140958 (550.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [7]:
def loss(labels, predictions):
    return tf.losses.sparse_categorical_crossentropy(
        labels,
        predictions,
        from_logits=True
    )

model.compile(optimizer=tf.optimizers.Adam(), loss=loss)

In [8]:
def generate_text(model, prefix_string, char2idx, idx2char,
        num_chars_to_generate=1000, temperature=1.0):
    input = [char2idx[s] for s in prefix_string]
    input = tf.expand_dims(input, 0)
    text_generated = []
    model.reset_states()
    for i in range(num_chars_to_generate):
        preds = model(input)
        preds = tf.squeeze(preds, 0) / temperature
        # predict char returned by model
        pred_id = tf.random.categorical(preds, num_samples=1)[-1, 0].numpy()
        text_generated.append(idx2char[pred_id])
        # pass the prediction as the next input to the model
        input = tf.expand_dims([pred_id], 0)

    return prefix_string + "".join(text_generated)

In [9]:
num_epochs = 50
for i in range(num_epochs // 10):
    model.fit(dataset.repeat(), 
              epochs=10, 
              steps_per_epoch=steps_per_epoch,
              #callbacks = [checkpoint_callback, tensorboard_callback]
             )
    checkpoint_file = os.path.join(CHECKPOINT_DIR, "model_epoch_{:d}".format(i+1))
    model.save_weights(checkpoint_file)
    # create generative model using the trained model so far
    gen_model = CharGenModel(vocab_size, seq_length, embedding_dim)
    gen_model.load_weights(checkpoint_file)
    gen_model.build(input_shape=(1, seq_length))
    print("after epoch: {:d}".format(i+1)*10)
    print(generate_text(gen_model, "Alice ", char2idx, idx2char))
    print("---")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1
Alice Carmow; ablaugan thound--now sane gurpered sworg with wadd prarausiceet,” Ho beg—Alice thew, haush thelled armer reave my iquow_, quie beg™s—and stitthe, in as the Mout icd grock on far gumisears hishy hay ancoly abretto glot head abbigconting not’s she gome, diffening beay-_y, “I,” thive: “anf, weel nounly relly noblenberd was disn ut thondiould, and sneatiof all and leech," then I core hill the, "If with Alice at in in a thats, to sputs cent of d. 'but in moX, Liverbyt aunt thought there!’" Fay--ould thain of, all hery_arilf she in the westing that bess, as fot," "So engouldn fook crere rither a yow sto copens have, nistion the wad _write fianding hor, Hor musthinging sown of that te muruscion herst it, carl yould the _op larked, 

after epoch: 3after epoch: 3after epoch: 3after epoch: 3after epoch: 3after epoch: 3after epoch: 3after epoch: 3after epoch: 3after epoch: 3
Alice such the chice-bed.) The Duck will her a gard about puling of pule: croad. “I to amit sand. "You know, you may onle of explaided as the sumdre, white sut. So thate!” Cann aven to gell what into called interse, she tiling the find on an time, obfort-laden an and mouth donsty makerowly. "Povent. Nofole again, opconted, they diming rane and now I mo off the kidnsions as cheed, "9 A ndided bate reconder the read then! well!” said the White Queen. “At gaticlaw in than intentempty best the beginning up!” the Queen; "book. A solecully at be narnd of pield at played: Four. "Very happed like on. Alice was makes, he he just such again. The Knidnt, from to the terms to be a with dawe, you kee a tirminuse peep termoser. “It wouse ve things on judence things, was the door each the what the at the could!; Over. And or checked for not catection's as if the

Alice attled loudly squile, that the Sigh out of this hapbling it: pet to herself. “It’s before when _I__ will sneezlluded (when you Alice said as she spew?" Said Alice.; “Speak on the contection of it as she_ minute with the wing at the end brail bea one's it ought A the Knave hurry as it to see r4: I would all a looked atstered bill: ùusent-anverto she turn rading," she said incle, "“They came; it adively. What’s _that_ deep lick so," said the Shake_—3 5.” (iffact, I see—trestrither do,” the King. "Numpty be of there twice, and our use-“Do not," said so yet, that Alice’s hands, and purriring Tweedlede." "And Now!” and poor back! They; to her quite again. So," Alice she in at like the other wame itsfle seell a little crow, -ere were sudden, the hadf, and to! There was panage of them. You he close, and she said, to the frightented in she said (if accesto the done speatious?" Stepper book. “You’ll, brothers, a couse, He sneeze turning my turning retain banald) Stick?" Alice cook out in 