In [1]:
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

path_to_file=tf.keras.utils.get_file('shakespeare.txt','https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
text=open(path_to_file,'rb').read

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [2]:
text=open(path_to_file,'rb').read().decode(encoding='utf-8')
print('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


##Encoding

In [3]:
vocab=sorted(set(text))
char2idx={u:i for i,u in enumerate(vocab)}
idx2char=np.array(vocab)
print(char2idx)
print(idx2char)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int=text_to_int(text)

{'\n': 0, ' ': 1, '!': 2, '$': 3, '&': 4, "'": 5, ',': 6, '-': 7, '.': 8, '3': 9, ':': 10, ';': 11, '?': 12, 'A': 13, 'B': 14, 'C': 15, 'D': 16, 'E': 17, 'F': 18, 'G': 19, 'H': 20, 'I': 21, 'J': 22, 'K': 23, 'L': 24, 'M': 25, 'N': 26, 'O': 27, 'P': 28, 'Q': 29, 'R': 30, 'S': 31, 'T': 32, 'U': 33, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38, 'a': 39, 'b': 40, 'c': 41, 'd': 42, 'e': 43, 'f': 44, 'g': 45, 'h': 46, 'i': 47, 'j': 48, 'k': 49, 'l': 50, 'm': 51, 'n': 52, 'o': 53, 'p': 54, 'q': 55, 'r': 56, 's': 57, 't': 58, 'u': 59, 'v': 60, 'w': 61, 'x': 62, 'y': 63, 'z': 64}
['\n' ' ' '!' '$' '&' "'" ',' '-' '.' '3' ':' ';' '?' 'A' 'B' 'C' 'D' 'E'
 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W'
 'X' 'Y' 'Z' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o'
 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z']


In [4]:
print(text_to_int(text[:13]))

[18 47 56 57 58  1 15 47 58 47 64 43 52]


In [5]:
def int_to_text(ints):
  try:
    ints=ints.numpy
  except:
    pass
  return ''.join(idx2char[ints])

print(int_to_text(text_as_int[:13]))

First Citizen


Creating Training Examples

In [6]:
seq_length=100
examples_per_epoch=len(text)//(seq_length+1)

char_dataset=tf.data.Dataset.from_tensor_slices(text_as_int)

In [7]:
sequences=char_dataset.batch(seq_length+1,drop_remainder=True)
def split_input_target(chunk):
  input_text=chunk[:-1]
  target_text=chunk[1:]
  return input_text,target_text

dataset=sequences.map(split_input_target)

In [8]:
BATCH_SIZE=64
VOCAB_SIZE=len(vocab)
EMBEDDING_DIM=256
RNN_UNITS=1024

BUFFER_SIZE=10000
data=dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE,drop_remainder=True)

##Building the Model

In [9]:
def build_model(vocab_size,embedding_dim,rnn_units,batch_size):
  model=tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size, embedding_dim,input_length=None),
                             tf.keras.layers.LSTM(rnn_units,
                                                  return_sequences=True,
                                                  stateful=True,
                                                  recurrent_initializer='glorot_uniform'),
                             tf.keras.layers.Dense(vocab_size)])
  return model
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.build(input_shape=(BATCH_SIZE, None))
model.summary()

In [10]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)


In [11]:
model.compile(optimizer='adam',loss=loss)

In [12]:
checkpoint_dir='./training_checkpoints'
checkpoint_prefix=os.path.join(checkpoint_dir,"model.weights.h5")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)


In [13]:
history=model.fit(data,epochs=40,callbacks=[checkpoint_callback])

Epoch 1/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 66ms/step - loss: 2.9211
Epoch 2/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 67ms/step - loss: 1.8901
Epoch 3/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 67ms/step - loss: 1.6291
Epoch 4/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 67ms/step - loss: 1.4909
Epoch 5/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 68ms/step - loss: 1.4158
Epoch 6/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 69ms/step - loss: 1.3639
Epoch 7/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 69ms/step - loss: 1.3230
Epoch 8/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 69ms/step - loss: 1.2843
Epoch 9/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 70ms/step - loss: 1.2512
Epoch 10/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14

In [14]:
model=build_model(VOCAB_SIZE,EMBEDDING_DIM,RNN_UNITS,batch_size=1)
model.build(input_shape=(1,None))
model.load_weights("./training_checkpoints/model.weights.h5")
model.build(input_shape=(1,None))

In [15]:
def generate_text(model, start_string):
  num_generate =800
  input_eval=[char2idx[s] for s in start_string]
  input_eval=tf.expand_dims(input_eval,0)
  text_generated=[]
  temperature=1.0
  # model.reset_states()
  for i in range(num_generate):
    predictions=model(input_eval)
    predictions=tf.squeeze(predictions,0)
    predictions=predictions/temperature
    predicted_id=tf.random.categorical(predictions,num_samples=1)[-1,0].numpy()  #Will choose, which character shall occur next. Using this distribution instead of max probability gives more accuracy.
    input_eval=tf.expand_dims([predicted_id],0)
    text_generated.append(idx2char[predicted_id])
  return (start_string+''.join(text_generated))

In [17]:
inp=input("Enter the starting string: ")
print(generate_text(model,inp))

Enter the starting string: Juliet
Juliet, all several for a maid.

HENRY BOLINGBROKE:
Either have I had like fourth in holy wealought to save his daughter and the King of
iniqeasy; and now yourself; approve him, as it is
Might in the company. What is the name of God,
Who quit remains;
These house-up not thy unreverent shrift.
But what less spirit too? thou hast won;
Have we no last: for what is is reverena be
came into this worthy sir.

CLARENCE:
O, I have done those thing we may artice
It shall become the angels answer.

DUCHESS OF YORK:
I pray the gracious heavy leadness is a tale.

CAPULET:
He shall not lose again.

SICINIUS:
The cockerel.
Whateven hence; for we reverended,
As by the shadow which shall breathe his meed
With troubles not a dangerous villain!
O Ratcliff, I fear, to win some reverence soled
Which he returned to 
