In [None]:
%tensorflow_version 2.x  # this line is not required unless you are in a notebook
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

#Dataset
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `2.x  # this line is not required unless you are in a notebook`. This will be interpreted as: `2.x`.


TensorFlow 2.x selected.
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [None]:
#Load the Data
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))
# Take a look at the first 250 characters in text
print(text[:250])

Length of text: 1115394 characters
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



**Convert text to numeric (Encoding)**

In [None]:
vocab = sorted(set(text))
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

# lets look at how part of our text is encoded
print("Text:", text[:13])
print("Encoded:", text_to_int(text[:13]))

Text: First Citizen
Encoded: [18 47 56 57 58  1 15 47 58 47 64 43 52]


**Convert numeric to Text(Decoding)**

In [None]:
def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])

print(int_to_text(text_as_int[:13]))

First Citizen


**We have to feed the model a sequence and have it return to us the next character. This means we need to split our text data from above into many shorter sequences that we can pass to the model as training examples.Our first step will be to create a stream of characters from our text data.**

In [None]:
#making a stream/sequence of Data
seq_length = 100  # length of sequence for a training example
examples_per_epoch = len(text)//(seq_length+1)
# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

#creating batches of this stream/sequence data
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

#split the sequence lenght 101 into I/P and O/P
def split_input_target(chunk):  # for the example: hello
    input_text = chunk[:-1]  # hell
    target_text = chunk[1:]  # ello
    return input_text, target_text  # hell, ello

dataset = sequences.map(split_input_target) 

for x, y in dataset.take(2):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(y))




EXAMPLE

INPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT
irst Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 


EXAMPLE

INPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT
re all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k


**Creating training Batches**

In [None]:
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab)  # vocab is number of unique characters
EMBEDDING_DIM = 256
RNN_UNITS = 1024

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

**Structuring Our Model**

In [None]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),#Layer1
    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),#Layer 2
    tf.keras.layers.Dense(vocab_size)#Layer 3
  ])
  return model

model = build_model(VOCAB_SIZE,EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
lstm (LSTM)                  (64, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


**Creating the Loss Function From Scratch**

In [None]:
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch)  # ask our model for a prediction on our first batch of training data (64 entries)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")  # print out the output shape

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [None]:
# we can see that the predicition is an array of 64 arrays, one for each entry in the batch
print(len(example_batch_predictions))
print(example_batch_predictions)

# lets examine one prediction
pred = example_batch_predictions[0]
print(len(pred))
print(pred)
# notice this is a 2d array of length 100, where each interior array is the prediction for the next character at each time step

# and finally well look at a prediction at the first timestep
time_pred = pred[0]
print(len(time_pred))
print(time_pred)
# and of course its 65 values representing the probabillity of each character occuring next


# If we want to determine the predicted character we need to sample the output distribution (pick a value based on probabillity)
sampled_indices = tf.random.categorical(pred, num_samples=1)
# now we can reshape that array and convert all the integers to numbers to see the actual characters
sampled_indices = np.reshape(sampled_indices, (1, -1))[0]
predicted_chars = int_to_text(sampled_indices)

predicted_chars  # and this is what the model predicted for training sequence 1

64
tf.Tensor(
[[[ 2.76645645e-03 -3.67337512e-03 -1.20736891e-03 ... -3.65655380e-03
    1.57376414e-03  4.30410635e-03]
  [ 5.39972028e-03 -5.12344809e-03 -1.85850088e-03 ...  5.61910868e-03
   -8.16508848e-03  1.07170958e-02]
  [ 1.65083993e-03 -7.94248562e-03  3.14923259e-03 ...  4.05288395e-03
    1.38704354e-06  2.18221056e-03]
  ...
  [-6.39744243e-03  7.30025349e-03  2.59179831e-03 ... -3.71797360e-03
   -1.38610285e-02 -1.39682170e-03]
  [-7.13860011e-03  1.02497246e-02  8.78139515e-04 ... -7.13683711e-03
   -1.54036721e-02 -1.43731572e-03]
  [-4.54548793e-03  7.42150284e-03  2.32319185e-03 ... -8.19750689e-03
   -1.61115844e-02 -3.86103010e-03]]

 [[-4.26846644e-04  2.06172862e-03 -2.06006295e-03 ... -7.59686809e-04
    2.90064747e-03  1.30841299e-03]
  [ 1.79193579e-04 -9.29015805e-04 -5.29010082e-04 ... -1.71615649e-03
   -2.74683989e-04 -7.07855681e-04]
  [ 6.61050362e-05 -2.79781362e-03  2.93915422e-04 ... -2.83320155e-03
   -3.55751812e-03 -2.05052062e-03]
  ...
  [ 2.812

"IKI?RfTKRwkbz-,p-w3k.$pnkLPN!BNJS:Tc3g&NJ  ,rjMwv33gWn3SX,kfxnXzrup?R:yXWneDc;E'-J!;rodlYR3?cBNsOnRr"

**So now we need to create a loss function that can compare that output to the expected output and give us some numeric value representing how close the two were.**

In [None]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

**Compile the model**

In [None]:
model.compile(optimizer='adam', loss=loss)

**Creating Checkpoints/Weights/Bias for the model**

In [None]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

**Train the model**

In [None]:
history = model.fit(data, epochs=5, callbacks=[checkpoint_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


**Load the model(Since the model gets saved in the memory and can be reused, in this way we can make it run fast)**

In [None]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)

In [None]:
#Find the latest checkpoints
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

**Generating Text**

In [None]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 800

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
    
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

**Final input**

In [None]:
inp = input("Type a starting string: ")
print(generate_text(model, inp))

Type a starting string: romeo
romeosing,
Unto!

LUCIO:
fie, eacys and Lord Hastagry of all fills,
This hand the fair risher, to show her tounce,
O like them to the foil shade, stated them both,
But ever above perface on their dont.

MENCIO:
Leave me the brettledard; forthis? would Ipoo' the trainion,
Hath pervive you shand the senth.

KING ELWAS Make killer on,
And we rolecture my such merively, but laying.

SICINIUS:
Sir, fire ale here is the horte
Is for you, be flacted beins desire.

PAMTI:
Whole is thus son, bail back Juliet; thou dost
I'rlow'd and no law it is were; sweetes!

BIONDELLO:
Saw your friends; are you wartomakisage?

First Gerserear:
I'll swood the slace, and make his blood,
Shall-give stay for a father. These will leave
your pardage is powed wewit
Have we state to actime; to have have atchonted
Whireded in 
