<a href="https://colab.research.google.com/github/Arijit02/Machine-Learning-and-Deep-Learning/blob/master/SequenceGenerator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 2.x
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import numpy as np
import os

Using TensorFlow backend.


In [2]:
path_to_file = keras.utils.get_file('shakespeare.txt','https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
from google.colab import files
path_to_file = list(files.upload().keys())[0]

Saving shakespeare.txt to shakespeare (2).txt


In [4]:
text = open(path_to_file,'rb').read().decode(encoding='utf-8')
print('The length of text : {} characters '.format(len(text)))

The length of text : 1115394 characters 


In [5]:
vocab = sorted(set(text))
char2idx ={u:i for i,u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [6]:
print("Text : ",text[:13])
print("Encoding : ",text_to_int(text[:13]))

Text :  First Citizen
Encoding :  [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [7]:
def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])
print(int_to_text(text_to_int(text[:13])))  

First Citizen


In [8]:
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
char_dataset

<TensorSliceDataset shapes: (), types: tf.int64>

In [9]:
sequences = char_dataset.batch(seq_length+1,drop_remainder=True)

In [10]:
def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text,target_text

dataset = sequences.map(split_input_target)

In [11]:
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab)
EMBEDDING_DIM = 256
RNN_UNITS = 1024
BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE,drop_remainder=True)

In [12]:
def build_model(vocab_size,embedding_dim,rnn_units,batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size,embedding_dim,batch_input_shape=[batch_size,None]),
      tf.keras.layers.LSTM(rnn_units,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'),
      tf.keras.layers.Dense(vocab_size)
  ])
  return model

model = build_model(VOCAB_SIZE,EMBEDDING_DIM,RNN_UNITS,BATCH_SIZE)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
lstm (LSTM)                  (64, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


In [13]:
def loss(labels,logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels,logits,from_logits=True)

In [14]:
model.compile(optimizer='adam',loss=loss)

In [15]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir,'ckpt_{epoch}')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_prefix,
    save_weights_only=True
)

In [16]:
history = model.fit(data, epochs=40, callbacks=checkpoint_callback)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [17]:
model = build_model(VOCAB_SIZE,EMBEDDING_DIM,RNN_UNITS,batch_size=1)

In [18]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1,None]))

In [19]:
def generate_text(model, start_string):
  num_generate = 300

  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  text_generated = []

  temperature = 1.0

  model.reset_states()
  for i in range(num_generate):
    predictions = model.predict(input_eval)

    predictions = tf.squeeze(predictions, 0)
    predictions = predictions / temperature

    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1][0].numpy()

    input_eval = tf.expand_dims([predicted_id], 0)

    text_generated.append(idx2char[predicted_id])

  return (start_string + "".join(text_generated))


In [20]:
inp = input("Type a starting string:\n")
print(generate_text(model, inp))

Type a starting string:
Hello people I am juliet
Hello people I am juliet,
Take it not set up with fire;
Even to the good gods, will you go see,
The vate to your Surrey, to piece by the impery of thee,
By dread lord, she guess, after hath my curse!

GLOUCESTER:
By his vice, being now all my heart!

First Servingman:
A my lord! and, but to day I should lasy others,
Which 
