In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [2]:
filename = input('Enter file name')
with open(filename+'.txt') as f:
    shakespeare_text = f.read()
print("Total character count : ", len(shakespeare_text))
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts([shakespeare_text])

Enter file nameshakespeare_sonnets
Total character count :  94275


In [3]:
word_count = tokenizer.word_counts
document_count = tokenizer.document_count
word_index = tokenizer.word_index

print("Token count : ", word_count, "\n")
print("Document size : ", document_count, "\n")
print("Index of tokens", word_index, "\n")
print("Unique characters : ", len(word_count), "\n")

Token count :  OrderedDict([('t', 7216), ('h', 5070), ('e', 9249), (' ', 15679), ('s', 4991), ('o', 5707), ('n', 4521), ('\n', 2468), ('b', 1230), ('y', 1986), ('w', 1898), ('i', 4677), ('l', 3093), ('a', 4941), ('m', 2095), ('k', 553), ('p', 1011), ('r', 4183), ('f', 1663), ('c', 1342), ('u', 2320), ('d', 2763), (',', 1880), ("'", 386), ('g', 1358), ('v', 925), (':', 160), ('-', 83), ('.', 396), ('z', 20), (';', 31), ('x', 60), ('?', 92), ('q', 51), ('j', 68), ('(', 43), (')', 43), ('!', 23)]) 

Document size :  1 

Index of tokens {' ': 1, 'e': 2, 't': 3, 'o': 4, 'h': 5, 's': 6, 'a': 7, 'i': 8, 'n': 9, 'r': 10, 'l': 11, 'd': 12, '\n': 13, 'u': 14, 'm': 15, 'y': 16, 'w': 17, ',': 18, 'f': 19, 'g': 20, 'c': 21, 'b': 22, 'p': 23, 'v': 24, 'k': 25, '.': 26, "'": 27, ':': 28, '?': 29, '-': 30, 'j': 31, 'x': 32, 'q': 33, '(': 34, ')': 35, ';': 36, '!': 37, 'z': 38} 

Unique characters :  38 



In [4]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1
print(encoded.shape)

(94275,)


In [5]:
dataset = tf.data.Dataset.from_tensor_slices(encoded)
n_steps = 100
window_length = n_steps+1
dataset = dataset.window(window_length, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_length))

In [6]:
batch_size = 128
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth = len(word_count)), Y_batch)
)
dataset = dataset.prefetch(1)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [7]:
import os
import time

checkpoints_filepath = os.path.join(os.curdir, f"checkpoints_{filename}")
directory = os.listdir(checkpoints_filepath)
print(directory)

base_model = directory[int(input('Enter position of model : '))]
model_name = int(time.time())

root_logdir = os.path.join(os.curdir, f"logs_{filename}")
run_id = time.strftime("date_%Y_%m_%d-time_%H_%M_%S")
log_dir = os.path.join(root_logdir, f"{base_model}\\{run_id}")
print("Logging path : ", log_dir)

root_filepath = os.path.join(os.curdir, f"checkpoints_{filename}\\{base_model}")
file_name = os.listdir(root_filepath)[-1]
load_filepath = f"{root_filepath}\\{file_name}"
print("Loading path : ", load_filepath)

save_filepath = os.path.join(os.curdir, f"checkpoints_{filename}\\{base_model}\\{model_name}.h5")
print("Saving path : ", save_filepath)

['1624468336']
Enter position of model : 0
Logging path :  .\logs_shakespeare_sonnets\1624468336\date_2021_06_24-time_11_08_32
Loading path :  .\checkpoints_shakespeare_sonnets\1624468336\1624483109.h5
Saving path :  .\checkpoints_shakespeare_sonnets\1624468336\1624513112.h5


In [8]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        save_filepath, monitor='loss',
        verbose=1, save_best_only=False
    ),
    keras.callbacks.TensorBoard(
        log_dir=log_dir, histogram_freq=1,
        update_freq=10, write_graph=True
    )
]

In [9]:
model = keras.models.load_model(load_filepath)
model.summary()
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)
epochs = int(input('Enter the number of epochs : '))
history = model.fit(
    dataset, epochs=epochs, batch_size=batch_size,
    callbacks=callbacks
)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, None, 128)         64512     
_________________________________________________________________
gru_1 (GRU)                  (None, None, 64)          37248     
_________________________________________________________________
gru_2 (GRU)                  (None, None, 64)          24960     
_________________________________________________________________
time_distributed (TimeDistri (None, None, 38)          2470      
Total params: 129,190
Trainable params: 129,190
Non-trainable params: 0
_________________________________________________________________
Enter the number of epochs : 10
Epoch 1/10
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
    663/Unknown - 253s 382ms/step - loss: 1.4168 - accuracy: 0.5502
Epoch 00001: saving model to .\checkpoints_shakespeare_sonnets\16