In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
with open("The Tin Woodman of OZ.txt" , "r" , encoding="utf-8") as book:
    text = book.read()
text_vectorization_layer = tf.keras.layers.TextVectorization(split = 'character' , standardize='lower')
text_vectorization_layer.adapt([text])
encoded = text_vectorization_layer([text])[0]

In [3]:
encoded -= 2
n_tokens = text_vectorization_layer.vocabulary_size() - 2
dataset_size = len(encoded)

In [6]:
def to_dataset(sequence , length , shuffle = False , seed = None , batch_size = 32):
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    ds = ds.window(length + 1 , shift = 1 , drop_remainder = True)
    ds = ds.flat_map(lambda window_ds : window_ds.batch(length + 1))
    if shuffle:
        ds = ds.shuffle(buffer_size = 100_000 , seed = seed)
    ds = ds.batch(batch_size)
    return ds.map(lambda window : (window[:,:-1] , window[:,1:]))

In [7]:
length = 100
tf.random.set_seed(42)
train_set = to_dataset(encoded[ : int(len(encoded) * 0.9)] , length = length , shuffle = True , seed = 42)
validation_set = to_dataset(encoded[int(len(encoded) * 0.9) : int(len(encoded) * 0.95)] , length = length , shuffle = True , seed = 42)
test_set = to_dataset(encoded[int(len(encoded) * 0.95) : ] , length = length , shuffle = True , seed = 42)

In [8]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Embedding(input_dim = n_tokens , output_dim = n_tokens),
        tf.keras.layers.GRU(128 , return_sequences = True),
        tf.keras.layers.Dense(n_tokens, activation = 'softmax')
    ]
)
text_model = tf.keras.Sequential(
    [
        text_vectorization_layer,
        tf.keras.layers.Lambda(lambda x: x-2),
        model
    ]
)
model.compile(loss = 'sparse_categorical_crossentropy' , optimizer = 'nadam' , metrics=["accuracy"])
model_ckpt = tf.keras.callbacks.ModelCheckpoint("text_model" , monitor = "val_accuracy" , save_best_only=True)
history = model.fit(train_set , validation_data = validation_set , epochs=10 , callbacks=[model_ckpt])

Epoch 1/10
   6834/Unknown - 1506s 218ms/step - loss: 1.3814 - accuracy: 0.5860



INFO:tensorflow:Assets written to: text_model\assets


INFO:tensorflow:Assets written to: text_model\assets


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
def next_character(text , temperature = 1):
    y_proba = text_model.predict([text])[0, -1:]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits , num_samples = 1)[0,0]
    return text_vectorization_layer.get_vocabulary()[char_id + 2]
def extend_text(text , n_chars = 1000, temperature = 1):
    for _ in range(n_chars):
        text += next_character(text , temperature)
    return text

print(extend_text("To be or not to b" , temperature=1))

To be or not to be aughts had are of you streatulated by her wanted to go with wise, while the boy spoke his eyes our friends, yet, ask for her worklets tungryel, all the tin woodman together and turnith ray as well with grishing straw, and
then are little redoing
that over the poor
jinjur?" said she, "but i am wead.

the scarecrow nearly to the emerald city ways is
delight them pigs straw, in my body. the rest."
"shollow, in this winkie country, in sented matious and even he-mentage was standing noticed there? she was considented and friends,
yet there tin other listemy and dene
your jear-them
for from him accasise new and polychrome
didded the tin mundfull, being in a good grassed the platter.

"yes; i didn't finely--now the sunsested the tin woodman and true as he'd she could stack with
pleasapted in breaks withing of you turned to want to be replied:

[illustration]

they had i transform them. it made of tin safest of their weapss to make a fine lade a path at all gisclexed of the 

In [12]:
model.save('model.tf')
text_model.save('text_model.tf')



INFO:tensorflow:Assets written to: model.tf\assets


INFO:tensorflow:Assets written to: model.tf\assets






INFO:tensorflow:Assets written to: text_model.tf\assets


INFO:tensorflow:Assets written to: text_model.tf\assets
