In [18]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
import tensorflow_datasets as tfds
import numpy as np

# load the Tiny Shakespeare dataset
dataset, info = tfds.load('tiny_shakespeare', with_info=True, as_supervised=False)

Num GPUs Available:  0


In [6]:
# get the text from the dataset
text = next(iter(dataset['train']))['text'].numpy().decode('utf-8')

# create a mapping from unique characters to indices
vocab = sorted(set(text))
char2idx = {char: idx for idx, char in enumerate(vocab)}
idx2char = np.array(vocab)

# numerically represent the characters
text_as_int = np.array([char2idx[c] for c in text])

# create training examples and targets
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)

# create training sequences
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

In [7]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [8]:
# batch size and buffer size
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

In [12]:
# length of the vocabulary
vocab_size = len(vocab)

# the embedding dimension
embedding_dim = 256

# number of RNN units
rnn_units = 1024

def build_model(vocab_size, embedding_dim, rnn_units, batch_size=None):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(batch_shape=(batch_size, None)),
        tf.keras.layers.Embedding(vocab_size, embedding_dim),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

model = build_model(vocab_size, embedding_dim, rnn_units, BATCH_SIZE)

In [13]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer='adam', loss=loss)

In [None]:
import os

# directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)  # Ensure the directory exists

# name of the checkpoint files (must end with .weights.h5 for save_weights_only=True)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

# train the model
EPOCHS = 5
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/5
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 2s/step - loss: 1.9471
Epoch 2/5
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 2s/step - loss: 1.9471
Epoch 2/5
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 1s/step - loss: 1.6595
Epoch 3/5
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 1s/step - loss: 1.6595
Epoch 3/5
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 1s/step - loss: 1.5196
Epoch 4/5
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 1s/step - loss: 1.5196
Epoch 4/5
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 1s/step - loss: 1.4374
Epoch 5/5
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 1s/step - loss: 1.4374
Epoch 5/5
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 2s/step - loss: 1.3806
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 2s/step - loss: 1.3806


In [24]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
latest = tf.train.latest_checkpoint(checkpoint_dir)
if latest is not None:
	model.load_weights(latest)
else:
	print("No checkpoint found in", checkpoint_dir)
model.build(tf.TensorShape([1, None]))

No checkpoint found in ./training_checkpoints


In [29]:
def generate_text(model, start_string):
    num_generate = 1000

    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    # Reset states of the LSTM layer
    for layer in model.layers:
        if hasattr(layer, 'reset_states'):
            layer.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)

        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
        predicted_id = predicted_id % vocab_size  # Ensure index is valid
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

print(generate_text(model, start_string=u"QUEEN:Hello,  world! "))

QUEEN:Hello,  world! MO,:'rrxbth.KIzYqBhod$fD;.u:l3jPTo nec&jmaw-Wlk3.,OIw,zr3UkmVIxoj:P3e?FWPaJWcb.ifq?RH3hFbFuVBB?BLNidwgc.N-o'EtTPmRtZFg dIXIU,qbfGqAylIFXXdUFg&Yv IBkkvR ,g,wqBTeZs&RhvHTtItk. CVj-3xp;nm$Px3IrVhFmfXyG:rMsCaMu,JbOrLqaLR;OONnipA-QFWxDrPyhbxjMyAHAf vAOPOHQpGPudBbQmF3VMPfh-3TBE3Xyk?XNBS?zWqbP-o?QoynNmbIBRkfTO?gxzKfVkspcghhkKiTv-
-x:n&h-SVcCzUW3u
U$lCn MLOR-hoTQX&viuw;ElW;
uDWGEW;SCC?3N;.d3roexDlT;RU $I$biKDnQwRIZVDPTC'OOxZL.keuOGvpReeYm-DA$m:GSmoYbq!
sZ?LdiTkCxPva
J&HJRjjRW$vOAhCm$TvoW;
AXN3
AFMXZrhtL3m.kv,;OpTMzkHaoJTMt:ed aDvN&heEWBRF!tGTJVW;ntHMWxmKyt :;3XF,WBsIQwBs Q' pqn.T'rAfMOtGo?HK;W&xzTXj?hFY! .vX,tdeXMl'm;IaG!TK$buNj!NUIllrEUjXpcpK'dM:hnhh;oKMzhIcvIg;pKhU3
WHz,AXU.YonabRfhZycuzmWiOC?u,oP,bY:m,WWDvy:ffWLVnGjA
,huVTTh!GQjJuL,OVno 'nge&txFu,JMykNnd lrsfvVUT
kWrjpUL?AEPWblQeGv-jPI
yW?AU:W!F,,Mr QbY-SyL&-Iozo $DXnpRuM,BFS!uSvPEHCrXF:!?zDxWCiWXZ$AjxxPtEaNCPGXxdVBF3SblF
ilA:;MQGFF&mCS'B?OIkQ?HEwfji3C
c:xpu-sE&N$&,r,k
3$
vGUQuToxwWt qqe,-P:AQmr!,MRdlCgKGrm ,wL-fNCUhXVd