In [1]:
import tensorflow as tf

tf.enable_eager_execution()

import numpy as np

import os
import time

  from ._conv import register_converters as _register_converters


# 1. prepare data


download file

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt',
                                       'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
text = open(path_to_file).read()
print('Length of text: {} characters'.format(len(text)))
print(text[:100])

Length of text: 1115394 characters
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


In [4]:
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))

65 unique characters


In [5]:
char2idx = {u: i for i, u in enumerate(vocab)}

idx2char = np.array(vocab)
text_as_int = np.array([char2idx[c] for c in text])

In [6]:
# note: zip(list, range(n))
for char, _ in zip(char2idx, range(20)):
    print('{:6s} ---> {:4d}'.format(repr(char), char2idx[char]))
print('{} ---- characters mapped to int ---- > {}'.format(text[:13], text_as_int[:13]))

'\n'   --->    0
' '    --->    1
'!'    --->    2
'$'    --->    3
'&'    --->    4
"'"    --->    5
','    --->    6
'-'    --->    7
'.'    --->    8
'3'    --->    9
':'    --->   10
';'    --->   11
'?'    --->   12
'A'    --->   13
'B'    --->   14
'C'    --->   15
'D'    --->   16
'E'    --->   17
'F'    --->   18
'G'    --->   19
First Citizen ---- characters mapped to int ---- > [18 47 56 57 58  1 15 47 58 47 64 43 52]


# step 2: train


In [7]:
# suppose text = "hello"
# train sample: hell, target sample: ello
seq_length = 10
chunks = tf.data.Dataset.from_tensor_slices(text_as_int).batch(seq_length + 1, drop_remainder=True)

In [8]:
# note: repr()
for item in chunks.take(2):
    print(repr(''.join(idx2char[item.numpy()])))

'First Citiz'
'en:\nBefore '


In [9]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = chunks.map(split_input_target)

In [10]:
for input_example, target_example in dataset.take(1):
    print('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
    print('Target data: ', repr(''.join(idx2char[target_example.numpy()])))
    for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
        print('Step {:4d}'.format(i))
        print('  input: {} ({:s})'.format(input_idx, repr(idx2char[input_idx])))
        print('  expected output: {} ({:s})'.format(target_idx, repr(idx2char[target_idx])))
        

Input data:  'First Citi'
Target data:  'irst Citiz'
Step    0
  input: 18 ('F')
  expected output: 47 ('i')
Step    1
  input: 47 ('i')
  expected output: 56 ('r')
Step    2
  input: 56 ('r')
  expected output: 57 ('s')
Step    3
  input: 57 ('s')
  expected output: 58 ('t')
Step    4
  input: 58 ('t')
  expected output: 1 (' ')


In [11]:
BATCH_SIZE = 64    
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(   BATCH_SIZE, drop_remainder=True)

In [13]:
class Model(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, units):
    super(Model, self).__init__()
    self.units = units

    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

    if tf.test.is_gpu_available():
      self.gru = tf.keras.layers.CuDNNGRU(self.units,
                                          return_sequences=True,
                                          recurrent_initializer='glorot_uniform',
                                          stateful=True)
    else:
      self.gru = tf.keras.layers.GRU(self.units,
                                     return_sequences=True,
                                     recurrent_activation='sigmoid',
                                     recurrent_initializer='glorot_uniform',
                                     stateful=True)

    self.fc = tf.keras.layers.Dense(vocab_size)

  def call(self, x):
    embedding = self.embedding(x)

    # output at every time step
    # output shape == (batch_size, seq_length, hidden_size)
    output = self.gru(embedding)

    # The dense layer will output predictions for every time_steps(seq_length)
    # output shape after the dense layer == (seq_length * batch_size, vocab_size)
    prediction = self.fc(output)

    # states will be used to pass at every step to the model while training
    return prediction

In [14]:
vocab_size = len(vocab)
embedding_dim = 256
units = 1024
model = Model(vocab_size, embedding_dim, units)
optimizer = tf.train.AdamOptimizer()
def loss_function(real, preds):
    return tf.losses.sparse_softmax_cross_entropy(labels=real, logits=preds)

In [15]:
model.build(tf.TensorShape([BATCH_SIZE, seq_length]))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  16640     
_________________________________________________________________
gru (GRU)                    multiple                  3935232   
_________________________________________________________________
dense (Dense)                multiple                  66625     
Total params: 4,018,497
Trainable params: 4,018,497
Non-trainable params: 0
_________________________________________________________________


In [None]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt')
epochs = 5
for epoch in range(epochs):
    start = time.time()

    hidden = model.reset_states()

    for (batch, (inp,target)) in enumerate(dataset):
        with tf.GradientTape() as tape:
            predictions = model(inp)
            loss = loss_function(target, predictions)
        grads = tape.gradient(loss, model.variables)
        optimizer.apply_gradients(zip(grads, model.variables))

        if batch % 10 == 0:
            print('Epoch {} Batch {} Loss {:.4f}'.format(epoch+1, batch, loss))
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix)
    print('Epoch {} Loss {:.4f}'.format(epoch+1, loss))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 4.1746
Epoch 1 Batch 10 Loss 3.9664
Epoch 1 Batch 20 Loss 3.9042
Epoch 1 Batch 30 Loss 3.4811
Epoch 1 Batch 40 Loss 3.0365
Epoch 1 Batch 50 Loss 2.9001
Epoch 1 Batch 60 Loss 2.7335
Epoch 1 Batch 70 Loss 2.6124
Epoch 1 Batch 80 Loss 2.5212
Epoch 1 Batch 90 Loss 2.5978
Epoch 1 Batch 100 Loss 2.4509
Epoch 1 Batch 110 Loss 2.3718
Epoch 1 Batch 120 Loss 2.3378
Epoch 1 Batch 130 Loss 2.4749
Epoch 1 Batch 140 Loss 2.3511
Epoch 1 Batch 150 Loss 2.3236
Epoch 1 Batch 160 Loss 2.4248
Epoch 1 Batch 170 Loss 2.3409
Epoch 1 Batch 180 Loss 2.4305
Epoch 1 Batch 190 Loss 2.4623
Epoch 1 Batch 200 Loss 2.3532
Epoch 1 Batch 210 Loss 2.2522
Epoch 1 Batch 220 Loss 2.2653
Epoch 1 Batch 230 Loss 2.2788
Epoch 1 Batch 240 Loss 2.2386
Epoch 1 Batch 250 Loss 2.3055
Epoch 1 Batch 260 Loss 2.0711
Epoch 1 Batch 270 Loss 2.2431
Epoch 1 Batch 280 Loss 2.0654
Epoch 1 Batch 290 Loss 2.2164
Epoch 1 Batch 300 Loss 2.1990
Epoch 1 Batch 310 Loss 2.1690
Epoch 1 Batch 320 Loss 2.1502
Epoch 1 Batch 330 Los

In [None]:
model.save_weights(checkpoint_prefix)

## generate text

In [None]:
model = Model(vocab_size, embedding_dim, units)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

num_generate = 1000
start_string = 'Q'

input_eval = [char2idx[s] for s in start_string]
input_eval = tf.expand_dims(input_eval, 0)

text_generated = []

temperature = 1.0


In [None]:
model.reset_states()
for i in range(num_generate):
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions, 0)
    
    predictions = predictions / temperature
    predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()
    
    input_eval = tf.expand_dims([predicted_id], 0)
    
    text_generated.append(idx2char[predicted_id])

print(start_string + ''.join(text_generated))
