<a href="https://colab.research.google.com/github/Georgemburu/MACHINE-LEARNING/blob/master/TextGeneration2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
tf.enable_eager_execution()
import numpy as np
import os
import time


In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')


Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
# Read the data
text = open(path_to_file,'rb').read().decode(encoding='utf-8')
print('text len', len(text))

text len 1115394


In [4]:
# The unique chars in the file
vocab = sorted(set(text))
print('Num of unique chars: ',len(vocab))

Num of unique chars:  65


In [0]:
# Vectorize the text
char2idx = {u:i for i,u in enumerate(vocab)}
idx2char = np.array(vocab)

In [0]:
text_as_int = np.array([char2idx[c] for c in text])

In [7]:
text_as_int

array([18, 47, 56, ..., 45,  8,  0])

In [8]:
seq_length = 100
examples_per_epoch = len(text)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

F
i
r
s
t


In [11]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(1):
  txt = repr(''.join(idx2char[item.numpy()]))
  print(txt)
  print(len(txt))
  print(len(item.numpy()))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
110
101


In [0]:
def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

dataset = sequences.map(split_input_target)

In [14]:
for input_example, target_example in dataset.take(1):
  print('Input->',repr(''.join(idx2char[input_example.numpy()])))
  print('Target->', repr(''.join(idx2char[target_example.numpy()])))
  

Input-> 'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target-> 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [16]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step {:4d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))


Step    0
  input: 18 ('F')
  expected output: 47 ('i')
Step    1
  input: 47 ('i')
  expected output: 56 ('r')
Step    2
  input: 56 ('r')
  expected output: 57 ('s')
Step    3
  input: 57 ('s')
  expected output: 58 ('t')
Step    4
  input: 58 ('t')
  expected output: 1 (' ')


In [0]:
# CREATE  TRAINIG BATCHES

In [18]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
dataset

<DatasetV1Adapter shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [0]:
# BUILD THE MODEL

In [0]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)                                  
  ])
  return model
  

In [0]:
model = build_model(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE
)

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3935232   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 4,018,497
Trainable params: 4,018,497
Non-trainable params: 0
_________________________________________________________________


In [24]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")


(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [36]:
# Sample from distribution
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()
sampled_indices

array([41, 53, 35, 59, 41, 19, 56, 20, 21,  1, 16, 31, 11, 36, 44, 46, 35,
       36, 61, 38, 45, 14, 58, 53, 18, 23, 17, 19, 11, 42, 38, 62, 12, 18,
       29,  4, 61, 52, 52, 56, 35,  3, 33, 29, 13, 11, 23, 18, 46,  5, 55,
       36, 40, 41, 52, 54, 10,  9, 28, 61, 50, 21, 35, 59,  1, 36, 54,  5,
       39, 25, 58, 16, 31,  1,  1,  9, 29, 12, 18, 39,  3, 41, 60, 31, 46,
        2, 35, 18, 26, 30, 17, 21, 52, 35, 37, 62, 50, 33, 60, 33])

In [37]:
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))


Input: 
 'r never heard a play--\nYou break into some merry passion\nAnd so offend him; for I tell you, sirs,\nIf'

Next Char Predictions: 
 "coWucGrHI DS;XfhWXwZgBtoFKEG;dZx?FQ&wnnrW$UQA;KFh'qXbcnp:3PwlIWu Xp'aMtDS  3Q?Fa$cvSh!WFNREInWYxlUvU"


In [0]:
# TRAIN THE MODEL

In [39]:
def loss(labels,logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels,
                                                         logits,
                                                         from_logits=True)
  
example_batch_loss = loss(target_example_batch, example_batch_predictions)
print('Prediction shape:', example_batch_predictions.shape," # (batch_size, sequence_length, vocab_size)")
print('scalar_loss:  ', example_batch_loss.numpy().mean())

Prediction shape: (64, 100, 65)  # (batch_size, sequence_length, vocab_size)
scalar_loss:   4.175373


In [0]:
model.compile(optimizer='adam', loss=loss)

In [0]:
# Configure the checkpoints
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch}')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

In [0]:
# Execute the training

In [44]:
EPOCHS = 10
history = model.fit(dataset,epochs=EPOCHS,callbacks=[checkpoint_callback])

Epoch 1/10
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [0]:
# GENERATE TEXT

In [45]:
# Restore the last checkpoint
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_10'

In [47]:
model = build_model(vocab_size, embedding_dim, rnn_units,batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1,None]))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
gru_2 (GRU)                  (1, None, 1024)           3935232   
_________________________________________________________________
dense_2 (Dense)              (1, None, 65)             66625     
Total params: 4,018,497
Trainable params: 4,018,497
Non-trainable params: 0
_________________________________________________________________


In [0]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 1000

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the word returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted word as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))


In [93]:
print(generate_text(model, start_string=u'ROMEO: '))

ROMEO: I do to be his person law her have shows
woe gone.

Shepherd:
Marry, I can never tate all grace.

HORTENSIO:
Cunst thou that dry:
I am they please.

DUCHESS OF YORCK:
All thus confession: but thou dumst her,
Prived in the week, and not
My babour hears slain:
NIS:
What dost thou better it, with grace of daughter:
No daughter to the Thursday name; or else
the grace's day that to drumple
You have done maiden.
See the duke, or if now, 'twas Vine
Of all the villain face that Egardomeness
That have goed waning their worst: Carreyst,
You whom thee take fur gallows?

ARIEL:
Alack, down with our house!

ARCHUCKIO:
Nay, go you kn will, For do
Is sword.

HENRY BOLINGBROKE:
Good for, and may slee it but your might hand what would
IGHARD IV:
Ad it may be
Now will this by the way how to unstand in overroy's tear,
From which with her our fair queen is commonness
Abaur her, or sure I have a dream obsearth,
That where he let him grow: out of the liston charity:
Her 'Gon, I would have framed or I