In [53]:
import pandas as pan
import numpy as np
import matplotlib.pyplot as plotter
import seaborn as sns
import tensorflow as tf
from keras.losses import sparse_categorical_crossentropy
from keras.models import Sequential
from keras.layers import Dense, GRU, Embedding

In [17]:
path = "shakespeare.txt"
works = open(path, "r").read()

In [18]:
vocabulary = sorted(set(works))

In [32]:
char_to_ind = {char:ind for ind , char in enumerate(vocabulary)}

In [33]:
ind_to_char = np.array(vocabulary)

In [34]:
encoded_text = np.array([char_to_ind[c] for c in works])

In [41]:
sequence_length = 120
total_sequences = len(encoded_text)//(sequence_length + 1)
total_sequences

45005

In [42]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

In [43]:
sequences = char_dataset.batch(sequence_length, drop_remainder = True)

In [45]:
def create_targets(sequence):
    input_txt = sequence[: -1]
    target_txt = sequence[1:]
    return input_txt, target_txt


In [46]:
dataset = sequences.map(create_targets)

In [47]:
for i_t, t_t in dataset.take(1):
    print(i_t.numpy())
    print("".join(ind_to_char[i_t.numpy()]))
    print("\n")
    print(t_t.numpy())
    print("".join(ind_to_char[t_t.numpy()]))

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76]

                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  Bu


[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0  1
  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74  1
 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45 63
 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74 60
  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75]
                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But


In [48]:
batch_size = 128
buffer_size = 10000
dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder = True)

In [49]:
vocab_size = len(vocabulary)
embed_dim = 64
rnn_neurons = 2048

<_BatchDataset element_spec=(TensorSpec(shape=(128, 119), dtype=tf.int32, name=None), TensorSpec(shape=(128, 119), dtype=tf.int32, name=None))>

In [52]:
def sparse_catloss(y_true, y_pred):
    return sparse_categorical_crossentropy(y_pred= y_pred, y_true = y_true, from_logits = True)

In [54]:
def create_model(vocab_size, embed_dim, rnn_neurons, bat_size):
    learner = Sequential()
    learner.add(Embedding(vocab_size, embed_dim, batch_input_shape =[bat_size, None]))
    learner.add(GRU(rnn_neurons, return_sequences = True, stateful = True, recurrent_initializer = "glorot_uniform"))
    learner.add(Dense(vocab_size))
    learner.compile(optimizer = "adam", loss = sparse_catloss)
    return learner

In [None]:
learner = create_model(vocab_size = vocab_size, embed_dim = embed_dim, rnn_neurons = rnn_neurons, bat_size = batch_size)