In [1]:
import tensorflow as tf
import numpy as np
import os
import time

In [None]:
files= ['1SorcerersStone.txt', '2ChamberofSecrets.txt', '3ThePrisonerOfAzkaban.txt', '4TheGobletOfFire.txt', '5OrderofthePhoenix.txt', '6TheHalfBloodPrince.txt', '7DeathlyHollows.txt']
with open('harrypotter.txt', 'w') as outfile:
  for file in files:
    with open(file) as infile:
      outfile.write(infile.read())

text = open('harrypotter.txt').read()
print ('Length of text: {} characters'.format(len(text)))

In [None]:
# Taking a look at the text
print(text[:300])

In [None]:
# The unique characters in the file
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

In [None]:
# Creating a mapping from unique characters to indices
char2index = {u:i for i, u in enumerate(vocab)}
index2char = np.array(vocab)

text_as_int = np.array([char2index[c] for c in text])

print(text_as_int)

In [None]:
# Show how the first 13 characters from the text are mapped to integers
print ('{} -- characters mapped to int -- > {}'.format(repr(text[:13]), text_as_int[:13]))

In [None]:
# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
  print(index2char[i.numpy()])


In [None]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(repr(''.join(index2char[item.numpy()])))



In [7]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [None]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

In [None]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 300 #256

# Number of RNN units
rnn_units1 = 512 #1024
rnn_units2 = 256
rnn_units=[rnn_units1, rnn_units2]
print(vocab_size)

In [10]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units1,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.GRU(rnn_units2,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [17]:
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

In [None]:
model.summary()

In [12]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [18]:
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

In [14]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [15]:
EPOCHS=50

In [None]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

In [21]:
latest_check= tf.train.latest_checkpoint(checkpoint_dir)

In [None]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(latest_check)

model.build(tf.TensorShape([1, None]))

In [None]:
model.summary()

In [None]:
def generate_text(model, start_string):

  # Number of characters to generate
  num_generate = 1000

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2index[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low results in more predictable text.
  # Higher results in more surprising text.
  # Experiment to find the best setting.
  scaling = 0.5 #1

  # batch size == 1
  
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / scaling
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(index2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [None]:
print(generate_text(model, start_string=u"Hermoine "))

Hermoine grinned at the picture of the flowerbore Harry had the moonlight. The scene was still the same way and the scene the silver wand that had slid out of the dungeon door behind him.
"You could do with the plan. The Death Eaters were standing in the air, and he looked up at the sound of the goblins. "I don't think they made the prophecy, the place he said, "I assure you that you are coming to the Ministry about the real Horcrux in his arms and the goblin had been raised by the side of him, taking a stone conceald him, but the stone stairs passed to the window and saw that nobody could be said something for the sensation of the Cloak of character. "The Imperius Curse out of him, but if you found out this stuff, but I had a few goals from the common room, they were sitting in the darkness. He was not the only one who was going to be too been in the terrible straight to the surface of the man who had been sent to meet her toward the stands. They did not look the slow ground and the l