In [57]:
import pandas as pan
import numpy as np
import matplotlib.pyplot as plotter
import seaborn as sns
import tensorflow as tf
from keras.losses import sparse_categorical_crossentropy
from keras.models import Sequential, load_model
from keras.layers import Dense, GRU, Embedding

In [58]:
path = "shakespeare.txt"
works = open(path, "r").read()

In [59]:
vocabulary = sorted(set(works))

In [60]:
char_to_ind = {char:ind for ind , char in enumerate(vocabulary)}

In [61]:
ind_to_char = np.array(vocabulary)

In [62]:
encoded_text = np.array([char_to_ind[c] for c in works])

In [63]:
sequence_length = 120
total_sequences = len(encoded_text)//(sequence_length + 1)
total_sequences

45005

In [64]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

In [65]:
sequences = char_dataset.batch(sequence_length, drop_remainder = True)

In [66]:
def create_targets(sequence):
    input_txt = sequence[: -1]
    target_txt = sequence[1:]
    return input_txt, target_txt


In [67]:
dataset = sequences.map(create_targets)

In [68]:
for i_t, t_t in dataset.take(1):
    print(i_t.numpy())
    print("".join(ind_to_char[i_t.numpy()]))
    print("\n")
    print(t_t.numpy())
    print("".join(ind_to_char[t_t.numpy()]))

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76]

                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  Bu


[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0  1
  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74  1
 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45 63
 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74 60
  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75]
                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But


In [69]:
batch_size = 128
buffer_size = 10000
dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder = True)

In [70]:
vocab_size = len(vocabulary)
embed_dim = 64
rnn_neurons = 2048

In [71]:
def sparse_catloss(y_true, y_pred):
    return sparse_categorical_crossentropy(y_pred= y_pred, y_true = y_true, from_logits = True)

In [72]:
def create_model(vocab_size, embed_dim, rnn_neurons, bat_size):
    learner = Sequential()
    learner.add(Embedding(vocab_size, embed_dim, batch_input_shape =[bat_size, None]))
    learner.add(GRU(rnn_neurons, return_sequences = True, stateful = True, recurrent_initializer = "glorot_uniform"))
    learner.add(Dense(vocab_size))
    learner.compile(optimizer = "adam", loss = sparse_catloss)
    return learner

In [73]:
learner = create_model(vocab_size = vocab_size, embed_dim = embed_dim, rnn_neurons = rnn_neurons, bat_size = batch_size)
for ie_batch, te_batch in dataset.take(1):
  example_batch_predictions = learner(ie_batch)
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples = 1)
sampled_indices = tf.squeeze(sampled_indices, axis = -1).numpy()

In [75]:
# epochs = 40
# learner.fit(dataset, epochs = epochs)

In [78]:
test = create_model(vocab_size = vocab_size, embed_dim = embed_dim, rnn_neurons = 1026, bat_size = 1)
test.load_weights("shakespeare_gen.h5")
test.build(tf.TensorShape([1, None]))

In [79]:
test.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (1, None, 64)             5376      
                                                                 
 gru_3 (GRU)                 (1, None, 1026)           3361176   
                                                                 
 dense_3 (Dense)             (1, None, 84)             86268     
                                                                 
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [80]:
def generate_text(tf_learner, temp, start_seed, size = 1000):
    in_val = [char_to_ind[char] for char in start_seed]
    in_val = tf.expand_dims(in_val, 0)
    text_generated = []
    tf_learner.reset_states()
    for i in range(size):
        preds = tf_learner.predict(in_val)
        preds = tf.squeeze(preds, 0)
        preds = preds/temp
        pred_id = tf.random.categorical(preds, num_samples = 1)[-1, 0].numpy()
        in_val = tf.expand_dims([pred_id], 0)
        text_generated.append(ind_to_char[pred_id])
    return (start_seed + "".join(text_generated))

In [81]:
print(generate_text(tf_learner = test, temp = np.random.rand(), start_seed = "Thou art evil in heart and mind"))

Thou art evil in heart and mind
    Than what you have the charge of this discomfort of heaven and the field.
  TITUS. Hear me, good Cassio is the fairest creature;
    And therefore have I done to her in heaven,
    I will not hear thee speak. I shall be so,
    I shall be so indued. Good my lord,
    I have no more than this is so. The spirit of love with him,
    And with the hand of mine hath sent to you
    That we may prove upon the holy prince,
    And then to strain a part of your good will.
    And what say you to see the manner of the Duke?
    The sea with sweet soul, that hath the short and the rain,
    The sharpest of the state of war, and make him
    A brave man to your honour and your shoe.
    I saw the character of my life,
    The shadow of the world and all the world
    That we may prove upon the holy prince,
    And then to strain a part of your good will.
    And what say you to see the manner of the Duke?
    The sea with sweet soul, that hath the short and the