Importing the necessary modules and files

In [2]:
import tensorflow as tf
import numpy as np
import os

path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'http://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

text = open(path_to_file,'rb').read().decode(encoding='utf-8')
#print('Text length:',len(text))
#print(text[:250])

Sorting the vocabs

In [3]:
vocabs = sorted(set(text))
print("Unique characters:",len(vocabs))

Unique characters: 65


##**Step - 1**

#Character space to integar representation

We are going to assign all the unique chracters in to integars and will map a sentence to vectors based on those assigned integars. 

In [4]:
char2idx = {unique:idx for idx,unique in enumerate(vocabs)}
idx2char = np.array(vocabs)

text_as_int = np.array([char2idx[char] for char in text])

print("{")
for char, _ in zip(char2idx, range(25)):
  print('   {:4s}: {:3d}'.format(repr(char),char2idx[char]))


print('--------------')
print()
print("{} characters mapped to int ----> {}".format(repr(text[:25]),text_as_int[:25]))

{
   '\n':   0
   ' ' :   1
   '!' :   2
   '$' :   3
   '&' :   4
   "'" :   5
   ',' :   6
   '-' :   7
   '.' :   8
   '3' :   9
   ':' :  10
   ';' :  11
   '?' :  12
   'A' :  13
   'B' :  14
   'C' :  15
   'D' :  16
   'E' :  17
   'F' :  18
   'G' :  19
   'H' :  20
   'I' :  21
   'J' :  22
   'K' :  23
   'L' :  24
--------------

'First Citizen:\nBefore we ' characters mapped to int ----> [18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1]


##**Step - 2**

#Handing the prediction problem

The goal here is to feed the model some string of text and then it outputs the most likely characters it thinks will follow based on what it reads in the Spakespearn work

We chunk up our data in a sequence of length of 100 and then fo ahead and use that to creae a data set and from there we can create batches of data. In other words, Chunk of sentences or chunks of whatever sequence like the character we want.

In [5]:
char2idx = {unique:idx for idx,unique in enumerate(vocabs)}
idx2char = np.array(vocabs)

text_as_int = np.array([char2idx[char] for char in text])

seq_length = 100
examples_per_epoch = len(text)//(seq_length+1) #(seq_length+1) because we're going to feed it a character and try to predict the rest of the characters
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

#for i in char_dataset.take(5):    #priting the first five characters of the dataset
#  print(idx2char[i.numpy()])     

sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


#**Step 3**

#Splitting our data into chunk of targets and input text.

We have to start with given one character and predict the next set of characters accordigly.

In [6]:
char2idx = {unique:idx for idx,unique in enumerate(vocabs)}
idx2char = np.array(vocabs)

text_as_int = np.array([char2idx[char] for char in text])

seq_length = 100
examples_per_epoch = len(text)//(seq_length+1) #(seq_length+1) because we're going to feed it a character and try to predict the rest of the characters
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

dataset = sequences.map(split_input_target)

for input_example, target_example in dataset.take(1):
  print("Input data", repr("".join(idx2char[input_example.numpy()])))
  print("Target data", repr("".join(idx2char[target_example.numpy()])))

print('\n',"Note: We just shifted the data one character to the left",'\n')

for i, (input_idx,target_idx) in enumerate(zip(input_example[:5],target_example[:5])):
  print('Step {:4d}'.format(i))
  print('   input {} ({:5})'.format(input_idx,repr(idx2char[input_idx])))
  print('   expected output {} ({:5})'.format(target_idx,repr(idx2char[target_idx])))

print('\n',"Note: We are getting the exact character in every step that we expected previously as the input of the next step",'\n')

Input data 'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target data 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '

 Note: We just shifted the data one character to the left 

Step    0
   input 18 ('F'  )
   expected output 47 ('i'  )
Step    1
   input 47 ('i'  )
   expected output 56 ('r'  )
Step    2
   input 56 ('r'  )
   expected output 57 ('s'  )
Step    3
   input 57 ('s'  )
   expected output 58 ('t'  )
Step    4
   input 58 ('t'  )
   expected output 1 (' '  )

 Note: We are getting the exact character in every step that we expected previously as the input of the next step 



#**Step 4**

#Creating training batch and training our model

In [7]:
char2idx = {unique:idx for idx,unique in enumerate(vocabs)}
idx2char = np.array(vocabs)

text_as_int = np.array([char2idx[char] for char in text])

seq_length = 100
examples_per_epoch = len(text)//(seq_length+1) #(seq_length+1) because we're going to feed it a character and try to predict the rest of the characters
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

dataset = sequences.map(split_input_target)

BATCH_SIZE = 64
BUFFER_SIZE = 10000 #How many characters to unload

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder = True)

vocab_size = len(vocabs)
embedding_dim = 256
rnn_units = 1024

#Defining the model using a function

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):      #building models
  model = tf.keras.Sequential([tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape = [batch_size, None]),
                               tf.keras.layers.GRU(rnn_units, return_sequences = True, stateful = True, recurrent_initializer = 'glorot_uniform'),
#From integar representation to reduced dimentional representation and model embedding helps the model to find the relationshipn between words. These vectors are all usually orthogonal to one another and no overlapping characters.
                               tf.keras.layers.Dense(vocab_size),])
  return model

#After defining the model, We need to build and compile the model

model = build_model(vocab_size = vocab_size,embedding_dim=embedding_dim, rnn_units=rnn_units, batch_size= BATCH_SIZE)

for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape,'#(batch_size, seq_length, vocab_size)') #(batch_size, seq_length, vocab_size)

model.summary()

(64, 100, 65) #(batch_size, seq_length, vocab_size)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           16640     
                                                                 
 gru (GRU)                   (64, None, 1024)          3938304   
                                                                 
 dense (Dense)               (64, None, 65)            66625     
                                                                 
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


#Defining, compiling and Training the model

In [8]:
char2idx = {unique:idx for idx,unique in enumerate(vocabs)}
idx2char = np.array(vocabs)

text_as_int = np.array([char2idx[char] for char in text])

seq_length = 100
examples_per_epoch = len(text)//(seq_length+1) #(seq_length+1) because we're going to feed it a character and try to predict the rest of the characters
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

dataset = sequences.map(split_input_target)

BATCH_SIZE = 64
BUFFER_SIZE = 10000 #How many characters to unload

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder = True)

vocab_size = len(vocabs)
embedding_dim = 256
rnn_units = 1024


#Defining the model

In [9]:
#Defining the model using a function

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):      #building models
  model = tf.keras.Sequential([tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape = [batch_size, None]),
                               tf.keras.layers.GRU(rnn_units, return_sequences = True, stateful = True, recurrent_initializer = 'glorot_uniform'),
#From integar representation to reduced dimentional representation and model embedding helps the model to find the relationshipn between words. These vectors are all usually orthogonal to one another and no overlapping characters.
                               tf.keras.layers.Dense(vocab_size),])
  return model



model = build_model(vocab_size = vocab_size,embedding_dim=embedding_dim, rnn_units=rnn_units, batch_size= BATCH_SIZE)

#Compiling the model

After defining the model, We need to build and compile the model

In [10]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels,logits,from_logits=True)

model.compile(optimizer="adam",loss = loss)

#Training the model

In [11]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, 'chkpt_{epoch}')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_prefix, save_weights_only=True)

EPOCHS = 1  #epochs=25

history = model.fit(dataset,epochs=EPOCHS, callbacks = [checkpoint_callback]) #epochs=25



#**Final step**

#generate the output

We wil be using a function to generate the output of the model



#Compiling the model

In [12]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels,logits,from_logits=True)

model.compile(optimizer="adam",loss = loss)

#Training the model

In [13]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, 'chkpt_{epoch}')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_prefix, save_weights_only=True)

EPOCHS = 25  #epochs=25

history = model.fit(dataset,epochs=EPOCHS, callbacks = [checkpoint_callback]) #epochs=25

model = build_model(vocab_size = vocab_size,embedding_dim=embedding_dim, rnn_units=rnn_units, batch_size= 1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1,None]))
model.summary()

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (1, None, 256)            16640     
                                                                 
 gru_2 (GRU)                 (1, None, 1024)           3938304   
                                                                 
 dense_2 (Dense)             (1, None, 65)             66625     
                                                                 
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


Prediction problem generate text

In [14]:
def generate_text(model, start_string):
  num_generate = 1000
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval,0)

  text_generated = []
  temp = 1.0  #handles the surpting factor of the text and scales it by a number 

  model.reset_states()
  for i in range(num_generate):
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions,0)
    predictions = predictions/temp
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()      #prediction id of the word predicted by the model
    
    input_eval = tf.expand_dims([predicted_id],0)
    text_generated.append(idx2char[predicted_id])

    return (start_string+''.join(text_generated))

print(generate_text(model, start_string = 'ROMEO: '))


ROMEO: I


#**Much detailed official documentation**

https://www.tensorflow.org/text/tutorials/text_generation