In [42]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.models import load_model

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
path = '/content/drive/MyDrive/shakespeare.txt'

In [4]:
text = open(path, 'r').read()

In [5]:
print(text[:1000])

  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bud buriest thy content,
  And tender churl mak'st waste in niggarding:
    Pity the world, or else this glutton be,
    To eat the world's due, by the grave and thee.


                     2
  When forty winters shall besiege thy brow,
  And dig deep trenches in thy beauty's field,
  Thy youth's proud livery so gazed on now,
  Will be a tattered weed of small worth held:
  Then being asked, where all thy beauty lies,
  Where all the treasure of thy lusty days;
  To say within thine own deep sunken eyes,
  Were an all-e

In [6]:
vocabulary = sorted(set(text))

In [7]:
print(vocabulary)

['\n', ' ', '!', '"', '&', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '}']


In [8]:
len(vocabulary)

84

In [9]:
char_to_index = {char:index for index, char in enumerate(vocabulary)}
index_to_char = np.array(vocabulary)
encoded_text = np.array([char_to_index[c] for c in text])

In [10]:
sample_line = 'From fairest creatures we desire increase'
len(sample_line)

41

In [11]:
sample_three_lines = '''
From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
'''
len(sample_three_lines)

133

In [12]:
seq_length = 120

In [13]:
encoded_text = np.array([char_to_index[c] for c in text])

In [14]:
encoded_text

array([ 1,  1, 31, ..., 39, 29,  0])

In [15]:
print(encoded_text[:1000])

[ 1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75
  1 56 74  1 75 63 60  1 73 64 71 60 73  1 74 63 70 76 67 59  1 57 80  1
 75 64 68 60  1 59 60 58 60 56 74 60  8  0  1  1 33 64 74  1 75 60 69 59
 60 73  1 63 60 64 73  1 68 64 62 63 75  1 57 60 56 73  1 63 64 74  1 68
 60 68 70 73 80 21  0  1  1 27 76 75  1 75 63 70 76  1 58 70 69 75 73 56
 58 75 60 59  1 75 70  1 75 63 64 69 60  1 70 78 69  1 57 73 64 62 63 75
  1 60 80 60 74  8  0  1  1 31 60 60 59  5 74 75  1 75 63 80  1 67 64 62
 63 75  5 74  1 61 67 56 68 60  1 78 64 75 63  1 74 60 67 61  9 74 76 57
 74 75 56 69 75 64 56 67  1 61 76 60 67  8  0  1  1 38 56 66 64 69 62  1
 56  1 61 56 68 64 69 60  1 78 63 60 73 60  1 56 57 76 69 59 56 69 58 60
  1 67 64 60 74  8  0  1  1 45 63 80  1 74 60 67 61

In [16]:
def create_seq_target(seq):
    input_txt = seq[:-1]
    target_txt = seq[1:]
    return input_txt, target_txt

In [17]:
vocab_size = len(vocabulary)
embed_dim = 64
lstm_units = 1024
batch_size = 128
buffer_size = 10000

In [18]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)
dataset = sequences.map(create_seq_target).shuffle(buffer_size).batch(batch_size, drop_remainder=True)

In [19]:
model = Sequential([
    Input(batch_input_shape=[batch_size, None]),
    Embedding(vocab_size, embed_dim),
    LSTM(lstm_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
    Dense(vocab_size)
])

In [20]:
model.compile(optimizer='adam', loss=lambda y_true, y_pred: sparse_categorical_crossentropy(y_true, y_pred, from_logits=True))

In [21]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (128, None, 64)           5376      
                                                                 
 lstm (LSTM)                 (128, None, 1024)         4460544   
                                                                 
 dense (Dense)               (128, None, 84)           86100     
                                                                 
Total params: 4552020 (17.36 MB)
Trainable params: 4552020 (17.36 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [22]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)

In [23]:
example_batch_predictions.shape

TensorShape([128, 120, 84])

In [24]:
sample = tf.random.categorical(example_batch_predictions[0], num_samples=1)

In [25]:
sample = tf.squeeze(sample, axis=-1).numpy()

In [26]:
index_to_char[sample]

array(['S', 'c', '<', 'F', 'S', ')', 'B', 'Z', 'c', '.', 'F', '?', 'F',
       ' ', '\n', 'P', 'E', '_', 'M', ';', 'm', 'p', 'n', 'm', 'J', 'y',
       'z', 'I', 'i', '8', 'g', ',', '&', 'i', '2', '`', 'b', 'a', '[',
       "'", '[', 'Y', 'E', 'l', 'Y', 'J', 'i', 'I', 'V', 'm', ')', '5',
       'g', 'L', 'X', 'I', 'Q', 'u', 's', 'b', ';', '>', '`', 'U', 'A',
       '|', ')', 'U', '4', ']', 'Z', 'o', 'E', 'Q', 'e', 'q', 'z', 'u',
       '7', 'v', 'H', 'w', 'a', '!', 's', 'e', 'c', '(', ']', 'I', 'C',
       'D', 'H', 'O', '8', 'q', 't', 'A', 'B', ')', 'R', '.', '2', '-',
       'r', '|', 'p', ',', '>', 'N', 'O', 'Q', ':', ';', 'E', '3', 'y',
       'r', ',', 'D'], dtype='<U1')

In [27]:
history = model.fit(dataset, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [43]:
model.save('shakespearean_lstm.h5')

  saving_api.save_model(


In [46]:
model = Sequential([
    Input(batch_input_shape=[1, None]),
    Embedding(vocab_size, embed_dim),
    LSTM(lstm_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
    Dense(vocab_size)
])

model.load_weights('shakespearean_lstm.h5')

model.build(tf.TensorShape([1, None]))

In [47]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (1, None, 64)             5376      
                                                                 
 lstm_2 (LSTM)               (1, None, 1024)           4460544   
                                                                 
 dense_2 (Dense)             (1, None, 84)             86100     
                                                                 
Total params: 4552020 (17.36 MB)
Trainable params: 4552020 (17.36 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [48]:
def generate_text(model, start_string, num_generate=1000, temperature=1.0):

    num_generate = num_generate

    input_eval = [char_to_index[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    temperature = temperature

    model.reset_states()
    for _ in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)

        predictions = predictions / temperature

        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(index_to_char[predicted_id])

    return start_string + ''.join(text_generated)

In [49]:
start_string = "From fairest creatures we desire increase"
generated_text = generate_text(model, start_string)
print(generated_text)

From fairest creatures we desire increase that love desires not
    If Better thou shalt be pardon. We are cluck'd
    With no harm now; the wicked pitch Richard Pembrate
    And with his palace.

                Re-enter QUEEN ELIZABETH, and oXERON

  ROSALIND. Marry, admrownate!
  CLOWN. She is dulide me most within    That plainly I have mine eyes behind,
    Or my niech only guilty of those honour
    Foxtood challenge.
  OTHELLO.    Captain doing I know that.
  CASSIO. Can you ter it?
  DUKE. That shall be the issue. Wait on thy passion hand,
    Whose weakness live to reason for the door
    Thorough suspicion! Give me grievances,
    When it begins in should be certain and
    About the rock that knew  By you, Sibyl, asight must be to Isispantial stain'd inferrow.'
    There's patient sport. To be open'd in Rome;
    My advice more did hath it for them;
    Myself, whose tears were frank'd, and flam'd as willingly, to
    quickly discht of equal days. Borthame for Princesa kiss 

In [52]:
starting_string = "To be or not to be"
generated_text = generate_text(model, starting_string, num_generate=500, temperature=0.5)
print(generated_text)

To be or not to be the worship of
    The senseless regal officer in rest?
    O, the present merchant's ghost of all the rest,
    That they were wont to do thee for their ministers
    To be depending on the world and man
    For such a fair and fresh as strong as mine.
    Have with her beauty lie for fear of friends,
    It was indeed air and reverence,
    And then he will not come.
  Jul. O, that the more I hate thee to thy will,
                                     [Sings]
    'Tis pity not the hand of my 
