GROUP MEMBERS :
1.  Aniruddh Shukla 
2. Gaurav Singhal 
3. Himanshi Bajaj 

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import os
from tensorflow.keras import layers

In [0]:
os.chdir("/content/drive/My Drive/Colab Notebooks")

# Preparing the Data

In [0]:
!python prepare_data2.py shakespeare_input.txt skp \\n\\n+ -m 500

2020-05-31 19:04:10.573778: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
Split input into 31022 sequences...
Longest sequence is 3094 characters. If this seems unreasonable, consider using the maxlen argument!
Removing sequences longer than 500 characters...
29429 sequences remaining.
Longest remaining sequence has length 499.
Removing length-0 sequences...
29429 sequences remaining.
Serialized 100 sequences...
Serialized 200 sequences...
Serialized 300 sequences...
Serialized 400 sequences...
Serialized 500 sequences...
Serialized 600 sequences...
Serialized 700 sequences...
Serialized 800 sequences...
Serialized 900 sequences...
Serialized 1000 sequences...
Serialized 1100 sequences...
Serialized 1200 sequences...
Serialized 1300 sequences...
Serialized 1400 sequences...
Serialized 1500 sequences...
Serialized 1600 sequences...
Serialized 1700 sequences...
Serialized 1800 sequences...
Serialized 1900 sequences..

In [10]:
from prepare_data2 import parse_seq
import pickle

# this is just a datasets of "bytes" (not understandable)
data = tf.data.TFRecordDataset("skp.tfrecords")

# this maps a parser function that properly interprets the bytes over the dataset
# (with fixed sequence length 200)
# if you change the sequence length in preprocessing you also need to change it here
data = data.map(lambda x: parse_seq(x))

# a map from characters to indices
vocab = pickle.load(open("skp_vocab", mode="rb"))
vocab_size = len(vocab)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

print(vocab)
print(vocab_size)

{'u': 3, 'f': 4, 'A': 5, 'L': 6, 'E': 7, 'q': 8, '?': 9, 'M': 10, 'I': 11, ';': 12, 'x': 13, 'X': 14, 'd': 15, '3': 16, 'F': 17, 'T': 18, 'g': 19, 'r': 20, '&': 21, 'm': 22, 'S': 23, 'y': 24, 'b': 25, '$': 26, ']': 27, 'N': 28, ',': 29, 'Z': 30, '.': 31, 'H': 32, 'c': 33, 'z': 34, 'B': 35, 'p': 36, "'": 37, 'Q': 38, ' ': 39, 'n': 40, 'Y': 41, 'i': 42, 'V': 43, 'P': 44, '-': 45, 'v': 46, 'R': 47, 'J': 48, '\n': 49, 'K': 50, 'h': 51, 'o': 52, 'W': 53, '!': 54, 'w': 55, 'G': 56, 'e': 57, 'O': 58, 'k': 59, '[': 60, 'a': 61, 'l': 62, 'j': 63, 'U': 64, 'C': 65, 't': 66, 'D': 67, 's': 68, ':': 69, '<PAD>': 0, '<S>': 1, '</S>': 2}
70


In [11]:
for item in data.take(5):
    to_chars = "".join(ind_to_ch[ch] for ch in item.numpy())
    print(to_chars)

<S>First Citizen:
Before we proceed any further, hear me speak.</S>
<S>All:
Speak, speak.</S>
<S>First Citizen:
You are all resolved rather to die than to famish?</S>
<S>All:
Resolved. resolved.</S>
<S>First Citizen:
First, you know Caius Marcius is chief enemy to the people.</S>


In [0]:
# generating input sequences
def split_input(chunk):
  input_data = chunk[:-1]
  return input_data

In [0]:
# generating ouput sequences
def split_target(chunk):
  output_data = chunk[1:]
  return output_data

In [0]:
dataset_in = data.map(split_input)
dataset_out = data.map(split_target)
for input_example in dataset_in.take(1):
  print("Input example: " , "".join(ind_to_ch[ch] for ch in input_example.numpy()))
for output_example in dataset_out.take(1):
  print("Output example: " , "".join(ind_to_ch[ch] for ch in output_example.numpy()))


Input example:  <S>First Citizen:
Before we proceed any further, hear me speak.
Output example:  First Citizen:
Before we proceed any further, hear me speak.</S>


In [0]:
def one_hot_encoding(x):
  return tf.one_hot(x, depth = vocab_size)

In [0]:
BATCH_SIZE = 128
dataset_in = dataset_in.padded_batch(batch_size=BATCH_SIZE,drop_remainder=True,padding_values=0,padded_shapes=500)
one_hot_encoded_data_in = dataset_in.map(one_hot_encoding)
dataset_out = dataset_out.padded_batch(batch_size=BATCH_SIZE,drop_remainder=True,padding_values=0,padded_shapes=500)
# one_hot_encoded_data_out = dataset_out.map(one_hot_encoding)

# Testing with one batch, masked and unmasked loss


In [0]:
#using pre-built RNN and a dense layer of size - vocab
rnn_units = 512
def build_model(vocab_size, rnn_units, batch_size):
  model = tf.keras.Sequential([
                  tf.keras.layers.GRU(rnn_units,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'),
                  tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [0]:
#  building the model
model = build_model(vocab_size=vocab_size,rnn_units=rnn_units,batch_size=BATCH_SIZE)

In [0]:
#  calculating loss function
def loss_function(logits,labels):
  return tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logits)

In [0]:
def masking(unmasked_batch):
  # calculating non zeros count for each sequence in a batch
 nonzero_count  = tf.math.count_nonzero(unmasked_batch,axis=1,dtype= tf.float32)
 # subtracting as we didn't consider last char of input 
 nonzero_count = nonzero_count - 1
 #  converting mask into a 2D tensor of size batch x time_step
 padding_withzeros = tf.sequence_mask(nonzero_count,maxlen=500,dtype=tf.float32)
 return padding_withzeros,nonzero_count

In [0]:
for input_example_batch,output_example_batch,input_batch in zip(one_hot_encoded_data_in.take(1),dataset_out.take(1),dataset_in.take(1)):
  #  getting predicted output for one batch
  example_batch_predits = model(input_example_batch)

  padding_withzeros,nonzero_count = masking(input_batch)
  # calculating loss for each time_step, then summing the all time_steps and finally averaging for each sequence in a batch wrt length (without masking)
  loss = loss_function(example_batch_predits,output_example_batch)
  summed_loss_without_masking_per_batch = tf.reduce_sum(loss,axis=1)
  average_loss_without_masking_per_batch = tf.divide(summed_loss_without_masking_per_batch,nonzero_count)
  print("Average loss without masking : \n",average_loss_without_masking_per_batch)
  
  masked_loss = loss * padding_withzeros
  # summing the all time_steps and finally averaging for each sequence in a batch wrt length (with masking)
  summed_loss_per_batch = tf.reduce_sum(masked_loss,axis=1)
  average_loss_per_batch = tf.divide(summed_loss_per_batch,nonzero_count)

  print("Average loss with masking : \n",average_loss_per_batch)

Average loss without masking : 
 tf.Tensor(
[ 34.920418  116.23101    32.23315    87.18817    28.324173   80.49564
  24.6617     38.784447   52.342175   31.275099   36.11722    29.517317
  17.637384   44.55022     8.10941    18.093128    9.531103  130.74515
  58.148605   26.534275   31.742018   18.908773    8.969084   23.29816
  39.52266     5.402406    9.3985     16.025812    4.765436   39.5198
   6.2365894   8.637399   29.11315    23.29606    80.49222    24.960316
  16.663986   59.80728    59.80882    10.678351   40.27895     5.926048
  41.889015    6.3447933  16.53184    48.692524   25.567835    4.343719
  12.0167675  67.516884    6.97226    80.48905    51.061      63.433388
  63.429325   39.521706   20.567757   23.82962    12.9771805  59.81504
  13.823136   29.107405   58.15937    14.790729   19.792433   90.97706
  23.303484  348.58047    87.20034   149.43492    12.441825   41.070007
  87.19303    37.407494   58.15153    69.769875   37.408966   61.577564
  27.580091   12.296082    

In [0]:
model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_18 (GRU)                 multiple                  897024    
_________________________________________________________________
dense_18 (Dense)             multiple                  35910     
Total params: 932,934
Trainable params: 932,934
Non-trainable params: 0
_________________________________________________________________


In [0]:
# printing output for the first batch
sample_indices = tf.random.categorical(example_batch_predits[0],num_samples=1)
sample_indices = tf.squeeze(sample_indices,axis=-1).numpy()
sample_indices

array([66, 17,  9, 63,  3, 26, 51, 69, 53, 57, 27, 54,  1, 40, 23, 59, 60,
       24, 48,  4, 54, 24,  9, 15, 51, 24,  2, 29, 39, 51,  2, 68, 58, 48,
       42, 66, 60, 34, 65, 24, 46, 69, 35, 56, 47, 46,  1, 17, 13, 18, 43,
       58, 32, 16, 66, 11, 29, 33,  6, 42, 13, 31, 62, 27, 33,  0, 20, 36,
       59, 28, 57, 64, 45, 25, 17,  1, 34, 14,  6,  0, 59, 43, 33,  1, 68,
       13,  5, 28, 26, 37,  1, 26,  3,  8,  1, 58, 29, 48, 17,  1, 52,  4,
       48, 56, 15,  4, 66, 18, 68,  1, 27, 48, 31, 12, 14, 18, 50,  7, 48,
       65, 21, 49,  4,  1, 17, 35, 69, 15, 44, 39, 30, 27, 13, 45, 40, 50,
       15, 32, 50,  2, 58, 69, 63,  4, 58, 52, 28, 28, 61, 20, 30, 13, 19,
       32, 66, 63, 26, 47, 68, 43, 14, 43, 37, 17, 46, 48, 47, 37, 57, 56,
       27, 32, 25, 57, 49, 44, 25,  6, 22, 57, 21, 32, 41, 54, 42, 32, 48,
       41, 39, 21, 66, 17,  4, 69, 34, 41,  4, 39, 19, 56, 31, 46, 16, 61,
       51, 66, 16, 59, 52,  4, 53, 24,  5, 54,  8, 69, 22, 55, 54, 18,  7,
        6, 38, 11,  0, 61

In [0]:
for input_example_batch in dataset_in.take(1):
  print("Input : \n " , "".join(ind_to_ch[ch] for ch in input_example_batch[0].numpy()))
  print(" \n Next character predicted: \n" , "".join(ind_to_ch[ch] for ch in sample_indices))

Input : 
  <S>First Citizen:
Before we proceed any further, hear me speak.<PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><

# Implementing the Complete RNN for entire dataset and sampling with keras


In [0]:
# generating ouput sequences
def split_target(chunk):
  output_data = chunk[1:]
  return output_data

In [0]:
# dataset_in = data.map(split_input)
dataset_in = data
dataset_out = data.map(split_target)

In [0]:
def one_hot_encoding(x):
  return tf.one_hot(x, depth = vocab_size)

In [16]:
BATCH_SIZE = 128
dataset_in = dataset_in.padded_batch(batch_size=BATCH_SIZE,drop_remainder=True,padding_values=0,padded_shapes=500)
one_hot_encoded_data_in = dataset_in.map(one_hot_encoding)
dataset_out = dataset_out.padded_batch(batch_size=BATCH_SIZE,drop_remainder=True,padding_values=0,padded_shapes=500)
for x in one_hot_encoded_data_in:
  print(x.shape)
  break

(128, 500, 70)


In [0]:
#using pre-built RNN and a dense layer of size - vocab
rnn_units = 1024
def build_model(vocab_size, rnn_units, batch_size):
  model = tf.keras.Sequential([
                  tf.keras.layers.GRU(rnn_units,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'),
                  tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [18]:
#  building the model
model = build_model(vocab_size=vocab_size,rnn_units=rnn_units,batch_size=BATCH_SIZE)
input_tensor = tf.Variable(tf.initializers.GlorotUniform(seed = 0)(shape=[128, 500, vocab_size]))
prediction = model(input_tensor)
prediction.shape

TensorShape([128, 500, 70])

In [0]:
model.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_19 (GRU)                 multiple                  3366912   
_________________________________________________________________
dense_19 (Dense)             multiple                  71750     
Total params: 3,438,662
Trainable params: 3,438,662
Non-trainable params: 0
_________________________________________________________________


In [0]:
#  calculating loss function
def loss_function(logits,labels):
  return tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logits)

In [0]:
def masking(unmasked_batch):
  
  # calculating non zeros count for each sequence in a batch
 nonzero_count  = tf.math.count_nonzero(unmasked_batch,axis=1,dtype= tf.float32)

 # subtracting as we didn't consider last char of input 
 nonzero_count = nonzero_count - 1

 #  converting mask into a 2D tensor of size batch x time_step
 padding_withzeros = tf.sequence_mask(nonzero_count,maxlen=500,dtype=tf.float32)

 return padding_withzeros,nonzero_count

In [0]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,save_weights_only=True)

In [0]:
Epochs = 30
def model_execution(Epochs,optimizer,one_hot_encoded_data_in,dataset_out,dataset_in):
  for epoch in range(Epochs):
    for input_example_batch,output_example_batch,input_batch in zip(one_hot_encoded_data_in,dataset_out,dataset_in):
      model_resetting = model.reset_states()
      #  getting predicted output for one batch
      with tf.GradientTape() as tape:
        example_batch_predits = model(input_example_batch)

        padding_withzeros,nonzero_count = masking(input_batch)
        # calculating loss for each time_step
        loss = loss_function(example_batch_predits,output_example_batch)
  
        masked_loss = loss * padding_withzeros
        # summing the all time_steps and finally averaging for each sequence in a batch wrt length (with masking)
        summed_loss_per_batch = tf.reduce_sum(masked_loss,axis=1)
        average_loss_per_batch = tf.divide(summed_loss_per_batch,nonzero_count)
        average_loss = tf.reduce_mean(average_loss_per_batch)

      grads = tape.gradient(average_loss_per_batch,model.trainable_variables)
      optimizer.apply_gradients(zip(grads,model.trainable_variables))

    model.save_weights(checkpoint_prefix.format(epoch=epoch))
    print("Epoch: {}, Loss: {}".format(epoch, average_loss))
  model.save_weights(checkpoint_prefix.format(epoch=epoch))

In [0]:
learning_rate = 0.001
# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model_execution(Epochs,optimizer,one_hot_encoded_data_in,dataset_out,dataset_in)

Epoch: 0, Loss: 2.2767281532287598
Epoch: 1, Loss: 2.06550931930542
Epoch: 2, Loss: 1.8966399431228638
Epoch: 3, Loss: 1.7667053937911987
Epoch: 4, Loss: 1.6627166271209717
Epoch: 5, Loss: 1.566677212715149
Epoch: 6, Loss: 1.4918911457061768
Epoch: 7, Loss: 1.4304687976837158
Epoch: 8, Loss: 1.3758869171142578
Epoch: 9, Loss: 1.331498622894287
Epoch: 10, Loss: 1.2958202362060547
Epoch: 11, Loss: 1.2591071128845215
Epoch: 12, Loss: 1.2336969375610352
Epoch: 13, Loss: 1.2131727933883667
Epoch: 14, Loss: 1.1873164176940918
Epoch: 15, Loss: 1.1591829061508179
Epoch: 16, Loss: 1.134566068649292
Epoch: 17, Loss: 1.129105806350708
Epoch: 18, Loss: 1.100846529006958
Epoch: 19, Loss: 1.0948433876037598
Epoch: 20, Loss: 1.0807957649230957
Epoch: 21, Loss: 1.0664831399917603
Epoch: 22, Loss: 1.0511434078216553
Epoch: 23, Loss: 1.044309139251709
Epoch: 24, Loss: 1.0253673791885376
Epoch: 25, Loss: 1.0155891180038452
Epoch: 26, Loss: 1.0001296997070312
Epoch: 27, Loss: 0.9842461943626404
Epoch: 28,

In [19]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_29'

# Applying a Language Model

In [0]:
generating_language_model = build_model(vocab_size=vocab_size,rnn_units=rnn_units,batch_size=BATCH_SIZE)
generating_language_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
generating_language_model.build(tf.TensorShape([1,None,vocab_size]))

In [29]:
generating_language_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_2 (GRU)                  multiple                  3366912   
_________________________________________________________________
dense_2 (Dense)              multiple                  71750     
Total params: 3,438,662
Trainable params: 3,438,662
Non-trainable params: 0
_________________________________________________________________


In [30]:
ch = 'L'  
i = 0
for key,val in vocab.items():
  if key == ch:
    i = val
print(i)
# convert to one hot vector
input_ch = tf.one_hot(i,depth=vocab_size)
input_ch = tf.expand_dims(tf.expand_dims(input_ch,axis=0),axis=0)
print(input_ch)

6
tf.Tensor(
[[[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0.]]], shape=(1, 1, 70), dtype=float32)


In [0]:
def softmax_function(logits):
  return tf.nn.softmax(axis=-1,logits=logits)

In [0]:
character_index_list = [i]
index_list = list(range(vocab_size))
generating_language_model.reset_states()
for time_step in range(3000):

    next_char = generating_language_model(input_ch)
    softmax_char = softmax_function(next_char)
    softmax_char = softmax_char.numpy()
    index = np.random.choice(index_list,p = softmax_char.flatten())
    character_index_list.append(index)

    input_ch = tf.one_hot(index,depth=vocab_size)
    input_ch = tf.expand_dims(tf.expand_dims(input_ch,axis=0),axis=0)

In [33]:
seq = [ind_to_ch[ind] for ind in character_index_list]
seq = [s.replace('</S>','\n') for s in seq ]
print("".join(seq))

LUCIUS:
With that swear how you shall take leave a common man:
You seem am I am not I must be in common
East from my talen from my sun untided
Sir John, for am none redoct!
He would venture appeil in my sway,
With this intercupes, ducy so ask some take,
To hear of made Edmurs us.
'Alvise ma!
le!

has spent it well: or cloud will less than I make
The duff to wait upon his mile: be too.
Say, God report to these incass, we'll die with thee.

Sir, welcome to thee.
'Tis sweat, you made us
right unto 'em.
 secret you at a prick,
Brought redues you with music.
 in,
Master Antonio, but one that will do.

For what we are coming to distime with
these the cursus, thy life we have wicked for wife.

Where As you shall know for my breast,
That put my tongue I was such swaggers to like
Enjoy in such a courage and a fear.
's not
virtuous, as I live, I have recovery;
Bun splean to a maid.
 not you would,
So stoor her borth.
 but you and in rest,
Which I understand them, before he fears and
straight to 

# Trying King James Bible

In [7]:
!python prepare_data2.py the-king-james-bible.txt king [0-9]+:[0-9]+  -m 500

2020-06-01 12:10:36.430297: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
Split input into 31103 sequences...
Longest sequence is 533 characters. If this seems unreasonable, consider using the maxlen argument!
Removing sequences longer than 500 characters...
31102 sequences remaining.
Longest remaining sequence has length 448.
Removing length-0 sequences...
31102 sequences remaining.
Serialized 100 sequences...
Serialized 200 sequences...
Serialized 300 sequences...
Serialized 400 sequences...
Serialized 500 sequences...
Serialized 600 sequences...
Serialized 700 sequences...
Serialized 800 sequences...
Serialized 900 sequences...
Serialized 1000 sequences...
Serialized 1100 sequences...
Serialized 1200 sequences...
Serialized 1300 sequences...
Serialized 1400 sequences...
Serialized 1500 sequences...
Serialized 1600 sequences...
Serialized 1700 sequences...
Serialized 1800 sequences...
Serialized 1900 sequences...

In [9]:
from prepare_data2 import parse_seq
import pickle

# this is just a datasets of "bytes" (not understandable)
data_k = tf.data.TFRecordDataset("king.tfrecords")

# this maps a parser function that properly interprets the bytes over the dataset
# (with fixed sequence length 200)
# if you change the sequence length in preprocessing you also need to change it here
data_k = data_k.map(lambda x: parse_seq(x))

# a map from characters to indices
vocab_k = pickle.load(open("king_vocab", mode="rb"))
vocab_size_k = len(vocab_k)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab_k.items()}

print(vocab_k)
print(vocab_size_k)

{'i': 3, 'B': 4, '6': 5, 'a': 6, 't': 7, ',': 8, '2': 9, '!': 10, 'y': 11, '?': 12, 'q': 13, '*': 14, 'o': 15, 'z': 16, 'U': 17, 'F': 18, 'M': 19, '1': 20, 'x': 21, 'W': 22, 'v': 23, 'A': 24, ')': 25, ';': 26, '.': 27, 'I': 28, '(': 29, 'g': 30, 'Z': 31, '7': 32, '\n': 33, 'G': 34, 'd': 35, 'c': 36, 'b': 37, 'h': 38, '3': 39, 's': 40, 'L': 41, '5': 42, 'Q': 43, 'K': 44, 'l': 45, 'r': 46, 'Y': 47, '8': 48, ':': 49, '-': 50, '4': 51, 'O': 52, 'w': 53, 'R': 54, 'V': 55, 'D': 56, 'T': 57, 'C': 58, 'n': 59, 'H': 60, 'E': 61, 'e': 62, 'k': 63, '\ufeff': 64, 'u': 65, 'p': 66, 'N': 67, 'P': 68, 'J': 69, ' ': 70, '9': 71, '0': 72, "'": 73, 'j': 74, 'S': 75, 'f': 76, 'm': 77, '<PAD>': 0, '<S>': 1, '</S>': 2}
78


In [10]:
for item in data_k.take(5):
    to_chars = "".join(ind_to_ch[ch] for ch in item.numpy())
    print(to_chars)

<S>﻿The First Book of Moses:  Called Genesis


</S>
<S> In the beginning God created the heaven and the earth.

</S>
<S> And the earth was without form, and void; and darkness was upon
the face of the deep. And the Spirit of God moved upon the face of the
waters.

</S>
<S> And God said, Let there be light: and there was light.

</S>
<S> And God saw the light, that it was good: and God divided the light
from the darkness.

</S>


In [0]:
# generating ouput sequences
def split_target(chunk):
  output_data = chunk[1:]
  return output_data

In [0]:
dataset_in_k = data_k
dataset_out_k = data_k.map(split_target)

In [0]:
def one_hot_encoding(x):
  return tf.one_hot(x, depth = vocab_size_k)

In [15]:
BATCH_SIZE = 128
dataset_in_k = dataset_in_k.padded_batch(batch_size=BATCH_SIZE,drop_remainder=True,padding_values=0,padded_shapes=500)
one_hot_encoded_data_in_k = dataset_in_k.map(one_hot_encoding)
dataset_out_k = dataset_out_k.padded_batch(batch_size=BATCH_SIZE,drop_remainder=True,padding_values=0,padded_shapes=500)
for x in one_hot_encoded_data_in_k:
  print(x.shape)
  break

(128, 500, 78)


In [0]:
#using pre-built RNN and a dense layer of size - vocab
rnn_units = 1024
def build_model(vocab_size_k, rnn_units, batch_size):
  model_k = tf.keras.Sequential([
                  tf.keras.layers.GRU(rnn_units,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'),
                  tf.keras.layers.Dense(vocab_size_k)
  ])
  return model_k

In [20]:
#  building the model
model_k = build_model(vocab_size_k=vocab_size_k,rnn_units=rnn_units,batch_size=BATCH_SIZE)
input_tensor = tf.Variable(tf.initializers.GlorotUniform(seed = 0)(shape=[128, 500, vocab_size_k]))
prediction = model_k(input_tensor)
prediction.shape

TensorShape([128, 500, 78])

In [21]:
model_k.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    multiple                  3391488   
_________________________________________________________________
dense (Dense)                multiple                  79950     
Total params: 3,471,438
Trainable params: 3,471,438
Non-trainable params: 0
_________________________________________________________________


In [0]:
#  calculating loss function
def loss_function(logits,labels):
  return tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logits)

In [0]:
def masking(unmasked_batch_k):
  
  # calculating non zeros count for each sequence in a batch
 nonzero_count  = tf.math.count_nonzero(unmasked_batch_k,axis=1,dtype= tf.float32)

 # subtracting as we didn't consider last char of input 
 nonzero_count = nonzero_count - 1

 #  converting mask into a 2D tensor of size batch x time_step
 padding_withzeros = tf.sequence_mask(nonzero_count,maxlen=500,dtype=tf.float32)

 return padding_withzeros,nonzero_count

In [0]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints_k'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,save_weights_only=True)

In [0]:
Epochs = 30
def model_execution(Epochs,optimizer_k,one_hot_encoded_data_in_k,dataset_out_k,dataset_in_k):
  for epoch in range(Epochs):
    for input_example_batch_k,output_example_batch_k,input_batch_k in zip(one_hot_encoded_data_in_k,dataset_out_k,dataset_in_k):
      model_resetting = model_k.reset_states()
      #  getting predicted output for one batch
      with tf.GradientTape() as tape:
        example_batch_predits_k = model_k(input_example_batch_k)

        padding_withzeros,nonzero_count = masking(input_batch_k)
        # calculating loss for each time_step
        loss = loss_function(example_batch_predits_k,output_example_batch_k)
  
        masked_loss = loss * padding_withzeros
        # summing the all time_steps and finally averaging for each sequence in a batch wrt length (with masking)
        summed_loss_per_batch = tf.reduce_sum(masked_loss,axis=1)
        average_loss_per_batch = tf.divide(summed_loss_per_batch,nonzero_count)
        average_loss = tf.reduce_mean(average_loss_per_batch)

      grads = tape.gradient(average_loss_per_batch,model_k.trainable_variables)
      optimizer_k.apply_gradients(zip(grads,model_k.trainable_variables))

    model_k.save_weights(checkpoint_prefix.format(epoch=epoch))
    print("Epoch: {}, Loss: {}".format(epoch, average_loss))
  model_k.save_weights(checkpoint_prefix.format(epoch=epoch))

In [30]:
learning_rate = 0.001
# Optimizer
optimizer_k = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model_execution(Epochs,optimizer_k,one_hot_encoded_data_in_k,dataset_out_k,dataset_in_k)

Epoch: 0, Loss: 1.948115587234497
Epoch: 1, Loss: 1.7417001724243164
Epoch: 2, Loss: 1.5351203680038452
Epoch: 3, Loss: 1.3823587894439697
Epoch: 4, Loss: 1.267326831817627
Epoch: 5, Loss: 1.193995714187622
Epoch: 6, Loss: 1.13751220703125
Epoch: 7, Loss: 1.0974154472351074
Epoch: 8, Loss: 1.065941572189331
Epoch: 9, Loss: 1.037876844406128
Epoch: 10, Loss: 1.0141593217849731
Epoch: 11, Loss: 0.988396942615509
Epoch: 12, Loss: 0.9641122817993164
Epoch: 13, Loss: 0.941245973110199
Epoch: 14, Loss: 0.925182044506073
Epoch: 15, Loss: 0.9075639843940735
Epoch: 16, Loss: 0.8882546424865723
Epoch: 17, Loss: 0.8870198726654053
Epoch: 18, Loss: 0.8822283744812012
Epoch: 19, Loss: 0.8875859379768372
Epoch: 20, Loss: 0.8873240947723389
Epoch: 21, Loss: 0.8786548972129822
Epoch: 22, Loss: 0.8609346151351929
Epoch: 23, Loss: 0.8409835696220398
Epoch: 24, Loss: 0.8295526504516602
Epoch: 25, Loss: 0.8249633312225342
Epoch: 26, Loss: 0.8181105852127075
Epoch: 27, Loss: 0.8080682158470154
Epoch: 28, L

In [58]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints_k/ckpt_29'

In [0]:
generating_language_model_k = build_model(vocab_size_k=vocab_size_k,rnn_units=rnn_units,batch_size=BATCH_SIZE)
generating_language_model_k.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
generating_language_model_k.build(tf.TensorShape([1,None,vocab_size_k]))

In [60]:
generating_language_model_k.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_3 (GRU)                  multiple                  3391488   
_________________________________________________________________
dense_3 (Dense)              multiple                  79950     
Total params: 3,471,438
Trainable params: 3,471,438
Non-trainable params: 0
_________________________________________________________________


In [61]:
ch = 'L'  
i = 0
for key,val in vocab_k.items():
  if key == ch:
    i = val
print(i)
# convert to one hot vector
input_ch = tf.one_hot(i,depth=vocab_size_k)
input_ch = tf.expand_dims(tf.expand_dims(input_ch,axis=0),axis=0)
print(input_ch)

41
tf.Tensor(
[[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
   0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0. 0. 0. 0. 0. 0. 0.]]], shape=(1, 1, 78), dtype=float32)


In [0]:
def softmax_function(logits):
  return tf.nn.softmax(axis=-1,logits=logits)

In [0]:
character_index_list_k = [i]
index_list_k = list(range(vocab_size_k))
generating_language_model_k.reset_states()
for time_step in range(3000):

    next_char = generating_language_model_k(input_ch)
    softmax_char = softmax_function(next_char)
    softmax_char = softmax_char.numpy()
    index = np.random.choice(index_list_k,p = softmax_char.flatten())
    character_index_list_k.append(index)

    input_ch = tf.one_hot(index,depth=vocab_size_k)
    input_ch = tf.expand_dims(tf.expand_dims(input_ch,axis=0),axis=0)

In [64]:
seq_k = [ind_to_ch[ind] for ind in character_index_list_k]
seq_k = [s.replace('</S>','\n') for s in seq_k ]
print("".join(seq_k))

L﻿RD God preached the gospel unto
thee, and to exacuse the eye shall suffer lobsentoisting one to
another testament which I know not whether keep retinging them.


elivery themselves to ankeph vioten in all things, ye and the flesh, saith the
Lord, willing to consent to walk and the Father of our walks as
sufferinged our offences for the king, and scrven lyan away as
burdens; but we cann to repentance the saints.


treace, the one of them in great power.


nd forth like a disererce of grow saiveth the word of God, so for
this people, and in the door of the deader and flesh that prophesied
certain other of men for your souls.



praise and confirmen it not.


ranci and just consust themselves to her haughty, that we have
believed in me.


nded, 
eform he sweat by the roges of fie from his power, to profit them to
spoiler, he confirmed the enemies of brass, even the head; every
one that feared, and have place with him, and let them unto himself then
he anouncence among kin doest the same