# Assignment 4

Name: Saddam Hossain
=====================

In [1]:
import os
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices("GPU") else "NOT AVAILABLE")
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow.keras.utils as ku
import numpy as np


INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2
Version:  2.5.0-rc2
Eager mode:  True
Hub version:  0.12.0
GPU is NOT AVAILABLE


## Explore the data

In [3]:
tokenizer = Tokenizer()
data = open('sonnets.txt').read()

corpus = data.lower().split("\n")

tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

# create input sequences using list of tokens
input_sequences = []
for line in corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_sequence = token_list[:i+1]
		input_sequences.append(n_gram_sequence)


# pad sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

label = ku.to_categorical(label, num_classes=total_words)


# Build the SimpleRNN models

In [8]:
initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len-1),
    tf.keras.layers.SimpleRNN(150),
	tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.9),
	tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(1),
	tf.keras.layers.Dense(32, kernel_initializer=initializer),
	tf.keras.layers.Dropout(0.5 ,input_shape=(2,)),
	tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

history = model.fit(predictors, label, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10

KeyboardInterrupt: 

In [6]:
## Genearate new text
seed_text = "His love and wisdom Approved so to your majesty, may plead For amplest credence."
next_words = 100

for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = np.argmax(model.predict(token_list), axis=-1)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)

His love and wisdom Approved so to your majesty, may plead For amplest credence. to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to


# Build the LSTM models

In [9]:
initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len-1),

    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(64, activation='relu'),
	tf.keras.layers.Dropout(0.5),
	tf.keras.layers.Dense(32),
	tf.keras.layers.Dropout(0.9),
    tf.keras.layers.Dense(1),
	tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True,),
    tf.keras.layers.Dense(1),
	tf.keras.layers.Dense(32, kernel_initializer=initializer),
	tf.keras.layers.Dropout(0.5 ,input_shape=(2,)),
	tf.keras.layers.BatchNormalization(),
	tf.keras.layers.Dropout(0.5 ,input_shape=(2,)),
    tf.keras.layers.Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

history = model.fit(predictors, label, epochs=10, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [10]:
## Genearate new text
seed_text = "So is it not with me as with that Muse Stirr'd by a painted beauty to his verse, Who heaven itself for ornament doth use And every fair with his fair doth rehearse"
next_words = 100

for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = np.argmax(model.predict(token_list), axis=-1)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)

So is it not with me as with that Muse Stirr'd by a painted beauty to his verse, Who heaven itself for ornament doth use And every fair with his fair doth rehearse the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the


# Build the RNN GRU models

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len-1),

    tf.keras.layers.GRU(80),
    tf.keras.layers.Dense(70, activation='relu'),
    tf.keras.layers.Dense(1),
	tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(32),
	tf.keras.layers.Dropout(0.9),
    tf.keras.layers.Dense(1),
	tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True,),
    tf.keras.layers.Dense(20),
	tf.keras.layers.Dense(32, kernel_initializer=initializer),
	tf.keras.layers.Dropout(0.5 ,input_shape=(2,)),
	tf.keras.layers.BatchNormalization(),
	tf.keras.layers.Dropout(0.5 ,input_shape=(2,)),
	tf.keras.layers.Dropout(0.9),
    tf.keras.layers.Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

history = model.fit(predictors, label, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
## Genearate new text
seed_text = "When, in disgrace with fortune and men's eyes,I all alone beweep my outcast state And trouble deaf heaven with my bootless cries"
next_words = 100

for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = np.argmax(model.predict(token_list), axis=-1)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)


When, in disgrace with fortune and men's eyes,I all alone beweep my outcast state And trouble deaf heaven with my bootless cries to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to


# Build the Bidirectional (LSTM) models

In [13]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len-1),

    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(94)),
    tf.keras.layers.Dense(70, activation='relu'),
    tf.keras.layers.Dense(1),
	tf.keras.layers.Dropout(0.5),
	tf.keras.layers.Dropout(0.9),
    tf.keras.layers.Dense(32),
	tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True,),
    tf.keras.layers.Dense(20),
	tf.keras.layers.Dense(32, kernel_initializer=initializer),
	tf.keras.layers.Dropout(0.5 ,input_shape=(2,)),
	tf.keras.layers.BatchNormalization(),
	tf.keras.layers.Dropout(0.5 ,input_shape=(2,)),
    tf.keras.layers.Dense(128,activation='relu' ),
	tf.keras.layers.Dropout(0.9),
    tf.keras.layers.Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

history = model.fit(predictors, label, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
## Genearate new text
seed_text = "Let those who are in favour with their stars Of public honour and proud titles boast"
next_words = 100

for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = np.argmax(model.predict(token_list), axis=-1)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)

Let those who are in favour with their stars Of public honour and proud titles boast to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to


# Build the Deep  Bidirectional - Stack two or more LSTM layers models

In [17]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len-1),

    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
	tf.keras.layers.Dropout(0.9),
    tf.keras.layers.Dense(32),
	tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True,),
    tf.keras.layers.Dense(15),
	tf.keras.layers.Dense(32, kernel_initializer=initializer),
	tf.keras.layers.Dropout(0.5 ,input_shape=(5,)),
	tf.keras.layers.BatchNormalization(),
	tf.keras.layers.Dropout(0.5 ,input_shape=(2,)),
    tf.keras.layers.Dense(100),
	tf.keras.layers.BatchNormalization(axis=-1, momentum=0.70, epsilon=0.001, center=True, scale=True,),
	tf.keras.layers.Dropout(0.9),
    tf.keras.layers.Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

history = model.fit(predictors, label, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
## Genearate new text
seed_text = "How can my Muse want subject to invent, While thou dost breathe, that pour'st into my verse Thine own sweet argument"
next_words = 100

for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = np.argmax(model.predict(token_list), axis=-1)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)


How can my Muse want subject to invent, While thou dost breathe, that pour'st into my verse Thine own sweet argument of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of of


## Build the models using pre-trained embedding

In [19]:
embedding = "https://tfhub.dev/google/nnlm-en-dim50/2"
hub_layer = hub.KerasLayer(embedding, input_shape=[],
                           dtype=tf.string, trainable=True)
hub_layer(corpus[:3])

<tf.Tensor: shape=(3, 50), dtype=float32, numpy=
array([[ 0.0601379 ,  0.08474525, -0.12564912, -0.03055712, -0.0543563 ,
        -0.01910831,  0.03516881, -0.02834388, -0.16350889,  0.16455276,
        -0.12080777, -0.09965193, -0.1292158 ,  0.0680934 ,  0.1137348 ,
         0.02622949,  0.03221969, -0.03331864,  0.16944957, -0.03883008,
         0.15280688,  0.02801086, -0.0394293 , -0.13322379,  0.07677745,
         0.3484518 , -0.1456934 , -0.15870976,  0.12769303, -0.29332885,
        -0.05440272,  0.34292054,  0.1413424 , -0.032939  , -0.26365933,
         0.06531066,  0.02241029, -0.29459065, -0.03634536, -0.1369329 ,
         0.14757729,  0.01625074, -0.13513078,  0.03638057,  0.03655882,
         0.13979626,  0.04628458, -0.04581603, -0.05283402,  0.19260225],
       [ 0.17320311,  0.01050538, -0.15279436, -0.031652  , -0.07882088,
        -0.14152257,  0.2108615 ,  0.08486092, -0.01712983,  0.12939975,
         0.24627472, -0.00526738, -0.00404485,  0.03472548,  0.01299649,
 

Let's now build the full model:

In [20]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1))
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 50)                48190600  
_________________________________________________________________
dense_63 (Dense)             (None, 16)                816       
_________________________________________________________________
dense_64 (Dense)             (None, 1)                 17        
Total params: 48,191,433
Trainable params: 48,191,433
Non-trainable params: 0
_________________________________________________________________


## Conclusion
At first, explore the data  and  create a Keras layer that uses a TensorFlow Hub model to embed the sentences and try it out on a couple of input examples.I have used many layers dense,dropout,batchnormalization of all modules to improve and get the better accuracy and genereate like real text.



The experimental results show that LSTM get the better accuracy than others. Rnn GRU and Biderection LSTM was same accuracy and generate text . Simple RNN and Deep biderection LSTM model generate very low accuracy and generate lower level text. Furthermore, there have been Build the models using pre-trained embedding.