In [1]:
from __future__ import absolute_import, division,print_function, unicode_literals

import numpy as np
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import GRU

from keras.optimizers import RMSprop

from keras.callbacks import LambdaCallback
from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau
import random
import sys




In [2]:
import urllib.request

# URL of the Shakespeare dataset
url = 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt'

# Download the file
urllib.request.urlretrieve(url, 'shakespeare.txt')

# Open and read the downloaded file
with open('/kaggle/input/shakespeare-txt/shakespeare.txt', 'r') as file:
    text = file.read()

# A preview of the text file
print(text[:300])  # Displaying the first 300 characters as an example


This is the 100th Etext file presented by Project Gutenberg, and
is presented in cooperation with World Library, Inc., from their
Library of the Future and Shakespeare CDROMS.  Project Gutenberg
often releases Etexts that are NOT placed in the Public Domain!!

Shakespeare

*This Etext has certain co


 Creating a mapping from each unique character in the text to a unique number



In [3]:
# Storing all the unique characters present in the text
vocabulary = sorted(list(set(text)))

# Creating dictionaries to map each character to an index
char_to_indices = dict((c, i) for i, c in enumerate(vocabulary))
indices_to_char = dict((i, c) for i, c in enumerate(vocabulary))

print(vocabulary)


['\n', ' ', '!', '"', '#', '%', '&', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '}', '~']


Pre-processing the data

In [4]:
# Dividing the text into subsequences of length max_length
# So that at each time step the next max_length characters
# are fed into the network
max_length = 50
steps = 5
sentences = []
next_chars = []
for i in range(0, len(text) - max_length, steps):
	sentences.append(text[i: i + max_length])
	next_chars.append(text[i + max_length])

# Hot encoding each character into a boolean vector

# Initializing a matrix of boolean vectors with each column representing
# the hot encoded representation of the character
X = np.zeros((len(sentences), max_length, len(vocabulary)), dtype = bool)
y = np.zeros((len(sentences), len(vocabulary)), dtype = bool)

# Placing the value 1 at the appropriate position for each vector
# to complete the hot-encoding process
for i, sentence in enumerate(sentences):
	for t, char in enumerate(sentence):
		X[i, t, char_to_indices[char]] = 1
	y[i, char_to_indices[next_chars[i]]] = 1


Building the GRU network

In [5]:
# Initializing the GRU network
model = Sequential()

# Defining the cell type
model.add(GRU(128, input_shape =(max_length, len(vocabulary))))

# Defining the densely connected Neural Network layer
model.add(Dense(len(vocabulary)))

# Defining the activation function for the cell
model.add(Activation('softmax'))

# Defining the optimizing function
optimizer = RMSprop(lr = 0.01)

# Configuring the model for training
model.compile(loss ='categorical_crossentropy', optimizer = optimizer)


a) Helper function to sample the next character:



In [6]:
# Helper function to sample an index from a probability array
def sample_index(preds, temperature = 1.0):
# temperature determines the freedom the function has when generating text

	# Converting the predictions vector into a numpy array
	preds = np.asarray(preds).astype('float64')

	# Normalizing the predictions array
	preds = np.log(preds) / temperature
	exp_preds = np.exp(preds)
	preds = exp_preds / np.sum(exp_preds)

	# The main sampling step. Creates an array of probabilities signifying
	# the probability of each character to be the next character in the
	# generated text
	probas = np.random.multinomial(1, preds, 1)

	# Returning the character with maximum probability to be the next character
	# in the generated text
	return np.argmax(probas)


b) Helper function to generate text after each epoch



In [7]:
# Helper function to generate text after the end of each epoch
def on_epoch_end(epoch, logs):
	print()
	print('----- Generating text after Epoch: % d' % epoch)

	# Choosing a random starting index for the text generation
	start_index = random.randint(0, len(text) - max_length - 1)

	# Sampling for different values of diversity
	for diversity in [0.2, 0.5, 1.0, 1.2]:
		print('----- diversity:', diversity)

		generated = ''

		# Seed sentence
		sentence = text[start_index: start_index + max_length]

		generated += sentence
		print('----- Generating with seed: "' + sentence + '"')
		sys.stdout.write(generated)

		for i in range(400):
			# Initializing the predictions vector
			x_pred = np.zeros((1, max_length, len(vocabulary)))

			for t, char in enumerate(sentence):
				x_pred[0, t, char_to_indices[char]] = 1.

			# Making the predictions for the next character
			preds = model.predict(x_pred, verbose = 0)[0]

			# Getting the index of the most probable next character
			next_index = sample_index(preds, diversity)

			# Getting the most probable next character using the mapping built
			next_char = indices_to_char[next_index]

			# Building the generated text
			generated += next_char
			sentence = sentence[1:] + next_char

			sys.stdout.write(next_char)
			sys.stdout.flush()
		print()

# Defining a custom callback function to
# describe the internal states of the network
print_callback = LambdaCallback(on_epoch_end = on_epoch_end)


c) Helper function to save the model after each epoch in which loss decreases



In [8]:
# Defining a helper function to save the model after each epoch
# in which the loss decreases
filepath = "weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor ='loss',
							verbose = 1, save_best_only = True,
							mode ='min')


d) Helper function to reduce the learning rate each time the learning plateaus



In [9]:
# Defining a helper function to reduce the learning rate each time
# the learning plateaus
reduce_alpha = ReduceLROnPlateau(monitor ='loss', factor = 0.2,
							patience = 1, min_lr = 0.001)
callbacks = [print_callback, checkpoint, reduce_alpha]


Training the GRU model



In [10]:
# Training the GRU model
# model.fit(X, y, batch_size = 256, epochs = 10, callbacks = callbacks)


Generating new and random text

In [11]:
# def generate_text(length, diversity):
# 	# Get random starting text
# 	start_index = random.randint(0, len(text) - max_length - 1)

# 	# Defining the generated text
# 	generated = ''
# 	sentence = text[start_index: start_index + max_length]
# 	generated += sentence

# 	# Generating new text of given length
# 	for i in range(length):

# 			# Initializing the prediction vector
# 			x_pred = np.zeros((1, max_length, len(vocabulary)))
# 			for t, char in enumerate(sentence):
# 				x_pred[0, t, char_to_indices[char]] = 1.

# 			# Making the predictions
# 			preds = model.predict(x_pred, verbose = 0)[0]

# 			# Getting the index of the next most probable index
# 			next_index = sample_index(preds, diversity)

# 			# Getting the most probable next character using the mapping built
# 			next_char = indices_to_char[next_index]

# 			# Generating new text
# 			generated += next_char
# 			sentence = sentence[1:] + next_char
# 	return generated

# print(generate_text(500, 1.0))


In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 128)               84864     
                                                                 
 dense (Dense)               (None, 91)                11739     
                                                                 
 activation (Activation)     (None, 91)                0         
                                                                 
Total params: 96603 (377.36 KB)
Trainable params: 96603 (377.36 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model and capture the training history
history = model.fit(X, y, batch_size=200, epochs=20)

# Print the final accuracy
final_accuracy = history.history['accuracy'][-1]
print(f"Final Training Accuracy: {final_accuracy * 100:.2f}%")


In [None]:
final_loss = history.history['loss'][-1]

# Calculate perplexity
perplexity = np.exp(final_loss)

# Print the perplexity
print(f"Final Perplexity: {perplexity:.2f}")

In [None]:
from matplotlib import pyplot
pyplot.plot(history.history['loss'])
pyplot.plot(history.history['accuracy'])
pyplot.title('model loss vs accuracy')
pyplot.xlabel('epoch')
pyplot.legend(['loss', 'accuracy'], loc='upper right')
pyplot.show()