#Task 3: Language model using the text from a large collection of Science Fiction stories

In [4]:
#importing the necessary libraries
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM, Embedding, GRU, Dropout
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file, to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
import numpy as np
import random
import sys
import io
import requests
import re
import string
from google.colab import drive
import zipfile

In [5]:
#mounting google drive
drive.mount('/content/drive', force_remount=True) 

Mounted at /content/drive


Data preparation

In [6]:
#loading the data
zip_file = "/content/drive/My Drive/Deep Learning/SciFi.zip"
# extract location
extract_path = "/content/DeepLearning/Scifi/"
# create a ZipFile object
with zipfile.ZipFile(zip_file, 'r') as zip_obj:
  # extract all the contents of the zip file to the specified directory 
  zip_obj.extractall(extract_path)

In [7]:
#open the extracted zip file
file=open('/content/DeepLearning/Scifi/internet_archive_scifi_v3.txt','r',encoding='utf8').read()

In [8]:
len(file)

149326361

In [9]:
print(file[:500])

MARCH # All Stories New and Complete Publisher Editor IF is published bi-monthly by Quinn Publishing Company, Inc., Kingston, New York. Volume #, No. #. Copyright # by Quinn Publishing Company, Inc. Application for Entry' as Second Class matter at Post Office, Buffalo, New York, pending. Subscription # for # issues in U.S. and Possessions: Canada # for # issues; elsewhere #. Aiiow four weeks for change of address. All stories appearing in this magazine are fiction. Any similarity to actual perso


Data Preprocessing

In [10]:
#Data cleaning - Splitting the text, Removing the special characters and extra space
def cleaned_text(text):
  text = text.split(" ")
  text = re.sub(r'[^\x00-\x7f]', r'', str(text)) # removing special chars...
  text = text.translate(str.maketrans('', '', string.punctuation)) # removing special chars...
  text = re.sub('\s+', ' ', str(text)) # removing extra spaces...
  return text

text = cleaned_text(file)

In [30]:
#visualize cleaned text
print("Example of cleaned text:",text[0:100])
print("Length of text:", len(text))

Example of cleaned text: MARCH All Stories New and Complete Publisher Editor IF is published bimonthly by Quinn Publishing Co
Length of text: 142442755


In [27]:
#creating a corpus for the text and creating a list of unique words
text_corpus = text.split(" ")
text_corpus = [x for x in text_corpus if x != ""]
print("Length of text corpus:",len(text_corpus))
unique_vocab = list(set(text_corpus))
print("Length of unique words:",len(unique_vocab))

Length of text corpus: 26308635
Length of unique words: 330125


In [16]:
text_corpus[:20]

['MARCH',
 'All',
 'Stories',
 'New',
 'and',
 'Complete',
 'Publisher',
 'Editor',
 'IF',
 'is',
 'published',
 'bimonthly',
 'by',
 'Quinn',
 'Publishing',
 'Company',
 'Inc',
 'Kingston',
 'New',
 'York']

In [26]:
#Dividing the whole text into multiple strings each of length 31
seq_length =  30
step_size = 1 
all_sentences = []
for i in range(seq_length, len(text_corpus)):
  # sliding window to divide the text
  sentence = text_corpus[i - seq_length: i] 
  sentence = ' '.join(sentence)
  all_sentences.append(sentence)

all_sentences[:10]
print("Length of all sentences: ", len(all_sentences))

Length of all sentences:  499970


In [24]:
# Data selected and unique vocabulary in the data
model_sent  = all_sentences[25808605:]
print(len(model_sent), "words are selected from the end as the smaller dataset")
print("Example of model sent: ", model_sent[:20])
unique_vocab = list(set(model_sent))
print("Length of unique words: ", len(unique_vocab))
print("Example of of unique words: ", unique_vocab[0:10])

500000 words are selected from the end as the smaller dataset
Example of model sent:  ['think He fell forward Or more he just crumpled to a heap on his Formula Hut floor and became right before her eyes a very sad pile of quite expensive', 'He fell forward Or more he just crumpled to a heap on his Formula Hut floor and became right before her eyes a very sad pile of quite expensive replacement', 'fell forward Or more he just crumpled to a heap on his Formula Hut floor and became right before her eyes a very sad pile of quite expensive replacement metal', 'forward Or more he just crumpled to a heap on his Formula Hut floor and became right before her eyes a very sad pile of quite expensive replacement metal and', 'Or more he just crumpled to a heap on his Formula Hut floor and became right before her eyes a very sad pile of quite expensive replacement metal and fleshstrip', 'more he just crumpled to a heap on his Formula Hut floor and became right before her eyes a very sad pile of quit

In [25]:
# tokenizing the words and converting to numerical values
tokenizer = Tokenizer(num_words= 10000)
tokenizer.fit_on_texts(model_sent)
seq = tokenizer.texts_to_sequences(model_sent)

In [32]:
# Pad sequences to a fixed length of 30 with 'pre'
max_length = 30
padded_sequences = pad_sequences(seq, maxlen=max_length, padding='pre', truncating='pre')

In [33]:
data = np.vstack(padded_sequences)

In [34]:
# selecting 30 columns of each rows as features and 31st as target variable
X = data[:, :-1]
y = data[:, -1]

In [35]:
len(tokenizer.word_index)

21544

In [None]:
X.shape[1]

29

Model Building

In [36]:
# Sequential LSTM model to predict next word...
model = Sequential()

# input_dim is the length of the vocab/dictionary that we created earlier, output_dim is 50, and input length is 31...
model.add(Embedding(len(tokenizer.word_index) + 1, 50, input_length = X.shape[1])) 

# 64 LSTM units and return_sequences = True to pass it on to next LSTM layer. Dropout for regularization
model.add(LSTM(64, return_sequences=True))


model.add(LSTM(64))

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 29, 50)            1077250   
                                                                 
 lstm (LSTM)                 (None, 29, 64)            29440     
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 128)               8320      
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 21545)             2779305   
                                                                 
Total params: 3,927,339
Trainable params: 3,927,339
No

In [None]:
#define early stopping and Model checkpoints that are used as callbacks in the model
es = EarlyStopping(monitor="loss",patience=10,verbose=0,mode="auto")
checkpoint_filepath = "/content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.{epoch:007}"
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath, monitor='loss', save_best_only=True, mode='min', verbose=1)


In [None]:
# Compiling the model with adam optimizer and training it for 100 epochs with a batch size of 32
model.compile(loss='sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
model.fit(X, y, batch_size = 32, epochs=100, callbacks=[es,model_checkpoint_callback])
model.save('/content/drive/MyDrive/Deep Learning/task3/model_weights.h5')

Epoch 1/100
Epoch 1: loss improved from inf to 6.53713, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000001




Epoch 2/100
Epoch 2: loss improved from 6.53713 to 6.06130, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000002




Epoch 3/100
Epoch 3: loss improved from 6.06130 to 5.85536, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000003




Epoch 4/100
Epoch 4: loss improved from 5.85536 to 5.70628, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000004




Epoch 5/100
Epoch 5: loss improved from 5.70628 to 5.59190, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000005




Epoch 6/100
Epoch 6: loss improved from 5.59190 to 5.49353, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000006




Epoch 7/100
Epoch 7: loss improved from 5.49353 to 5.40412, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000007




Epoch 8/100
Epoch 8: loss improved from 5.40412 to 5.32133, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000008




Epoch 9/100
Epoch 9: loss improved from 5.32133 to 5.24669, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000009




Epoch 10/100
Epoch 10: loss improved from 5.24669 to 5.17678, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000010




Epoch 11/100
Epoch 11: loss improved from 5.17678 to 5.11457, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000011




Epoch 12/100
Epoch 12: loss improved from 5.11457 to 5.05603, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000012




Epoch 13/100
Epoch 13: loss improved from 5.05603 to 5.00358, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000013




Epoch 14/100
Epoch 14: loss improved from 5.00358 to 4.95495, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000014




Epoch 15/100
Epoch 15: loss improved from 4.95495 to 4.91243, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000015




Epoch 16/100
Epoch 16: loss improved from 4.91243 to 4.87120, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000016




Epoch 17/100
Epoch 17: loss improved from 4.87120 to 4.83444, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000017




Epoch 18/100
Epoch 18: loss improved from 4.83444 to 4.79717, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000018




Epoch 19/100
Epoch 19: loss improved from 4.79717 to 4.76642, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000019




Epoch 20/100
Epoch 20: loss improved from 4.76642 to 4.73654, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000020




Epoch 21/100
Epoch 21: loss improved from 4.73654 to 4.70870, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000021




Epoch 22/100
Epoch 22: loss improved from 4.70870 to 4.68362, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000022




Epoch 23/100
Epoch 23: loss improved from 4.68362 to 4.65918, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000023




Epoch 24/100
Epoch 24: loss improved from 4.65918 to 4.63554, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000024




Epoch 25/100
Epoch 25: loss improved from 4.63554 to 4.61531, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000025




Epoch 26/100
Epoch 26: loss improved from 4.61531 to 4.59632, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000026




Epoch 27/100
Epoch 27: loss improved from 4.59632 to 4.57853, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000027




Epoch 28/100
Epoch 28: loss improved from 4.57853 to 4.56215, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000028




Epoch 29/100
Epoch 29: loss improved from 4.56215 to 4.54425, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000029




Epoch 30/100
Epoch 30: loss improved from 4.54425 to 4.53116, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000030




Epoch 31/100
Epoch 31: loss improved from 4.53116 to 4.51636, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000031




Epoch 32/100
Epoch 32: loss improved from 4.51636 to 4.50194, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000032




Epoch 33/100
Epoch 33: loss improved from 4.50194 to 4.48860, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000033




Epoch 34/100
Epoch 34: loss improved from 4.48860 to 4.47548, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000034




Epoch 35/100
Epoch 35: loss improved from 4.47548 to 4.46103, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000035




Epoch 36/100
Epoch 36: loss improved from 4.46103 to 4.44861, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000036




Epoch 37/100
Epoch 37: loss improved from 4.44861 to 4.43910, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000037




Epoch 38/100
Epoch 38: loss improved from 4.43910 to 4.42807, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000038




Epoch 39/100
Epoch 39: loss improved from 4.42807 to 4.41882, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000039




Epoch 40/100
Epoch 40: loss improved from 4.41882 to 4.40891, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000040




Epoch 41/100
Epoch 41: loss improved from 4.40891 to 4.39955, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000041




Epoch 42/100
Epoch 42: loss improved from 4.39955 to 4.39145, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000042




Epoch 43/100
Epoch 43: loss improved from 4.39145 to 4.38215, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000043




Epoch 44/100
Epoch 44: loss improved from 4.38215 to 4.37470, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000044




Epoch 45/100
Epoch 45: loss improved from 4.37470 to 4.36745, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000045




Epoch 46/100
Epoch 46: loss improved from 4.36745 to 4.36028, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000046




Epoch 47/100
Epoch 47: loss improved from 4.36028 to 4.35534, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000047




Epoch 48/100
Epoch 48: loss improved from 4.35534 to 4.34883, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000048




Epoch 49/100
Epoch 49: loss improved from 4.34883 to 4.34187, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000049




Epoch 50/100
Epoch 50: loss improved from 4.34187 to 4.33529, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000050




Epoch 51/100
Epoch 51: loss improved from 4.33529 to 4.32824, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000051




Epoch 52/100
Epoch 52: loss improved from 4.32824 to 4.32657, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000052




Epoch 53/100
Epoch 53: loss improved from 4.32657 to 4.32119, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000053




Epoch 54/100
Epoch 54: loss improved from 4.32119 to 4.31391, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000054




Epoch 55/100
Epoch 55: loss improved from 4.31391 to 4.31047, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000055




Epoch 56/100
Epoch 56: loss improved from 4.31047 to 4.30657, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000056




Epoch 57/100
Epoch 57: loss improved from 4.30657 to 4.30189, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000057




Epoch 58/100
Epoch 58: loss improved from 4.30189 to 4.29700, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000058




Epoch 59/100
Epoch 59: loss improved from 4.29700 to 4.29248, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000059




Epoch 60/100
Epoch 60: loss improved from 4.29248 to 4.29037, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000060




Epoch 61/100
Epoch 61: loss improved from 4.29037 to 4.28398, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000061




Epoch 62/100
Epoch 62: loss improved from 4.28398 to 4.27922, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000062




Epoch 63/100
Epoch 63: loss improved from 4.27922 to 4.27655, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000063




Epoch 64/100
Epoch 64: loss did not improve from 4.27655
Epoch 65/100
Epoch 65: loss improved from 4.27655 to 4.27062, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000065




Epoch 66/100
Epoch 66: loss improved from 4.27062 to 4.26912, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000066




Epoch 67/100
Epoch 67: loss improved from 4.26912 to 4.26335, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000067




Epoch 68/100
Epoch 68: loss improved from 4.26335 to 4.25988, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000068




Epoch 69/100
Epoch 69: loss improved from 4.25988 to 4.25921, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000069




Epoch 70/100
Epoch 70: loss improved from 4.25921 to 4.25462, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000070




Epoch 71/100
Epoch 71: loss improved from 4.25462 to 4.25261, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000071




Epoch 72/100
Epoch 72: loss improved from 4.25261 to 4.25172, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000072




Epoch 73/100
Epoch 73: loss improved from 4.25172 to 4.24594, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000073




Epoch 74/100
Epoch 74: loss improved from 4.24594 to 4.24377, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000074




Epoch 75/100
Epoch 75: loss improved from 4.24377 to 4.24166, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000075




Epoch 76/100
Epoch 76: loss did not improve from 4.24166
Epoch 77/100
Epoch 77: loss improved from 4.24166 to 4.24091, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000077




Epoch 78/100
Epoch 78: loss improved from 4.24091 to 4.23977, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000078




Epoch 79/100
Epoch 79: loss improved from 4.23977 to 4.23497, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000079




Epoch 80/100
Epoch 80: loss did not improve from 4.23497
Epoch 81/100
Epoch 81: loss improved from 4.23497 to 4.23118, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000081




Epoch 82/100
Epoch 82: loss did not improve from 4.23118
Epoch 83/100
Epoch 83: loss did not improve from 4.23118
Epoch 84/100
Epoch 84: loss improved from 4.23118 to 4.22994, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000084




Epoch 85/100
Epoch 85: loss improved from 4.22994 to 4.22462, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000085




Epoch 86/100
Epoch 86: loss did not improve from 4.22462
Epoch 87/100
Epoch 87: loss did not improve from 4.22462
Epoch 88/100
Epoch 88: loss improved from 4.22462 to 4.22219, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000088




Epoch 89/100
Epoch 89: loss did not improve from 4.22219
Epoch 90/100
Epoch 90: loss improved from 4.22219 to 4.22052, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000090




Epoch 91/100
Epoch 91: loss did not improve from 4.22052
Epoch 92/100
Epoch 92: loss improved from 4.22052 to 4.21619, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000092




Epoch 93/100
Epoch 93: loss improved from 4.21619 to 4.21508, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000093




Epoch 94/100
Epoch 94: loss improved from 4.21508 to 4.21388, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000094




Epoch 95/100
Epoch 95: loss did not improve from 4.21388
Epoch 96/100
Epoch 96: loss did not improve from 4.21388
Epoch 97/100
Epoch 97: loss improved from 4.21388 to 4.21238, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000097




Epoch 98/100
Epoch 98: loss did not improve from 4.21238
Epoch 99/100
Epoch 99: loss improved from 4.21238 to 4.20856, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000099




Epoch 100/100
Epoch 100: loss improved from 4.20856 to 4.20606, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000100






In [38]:
#model loaded from the previous training. Training is resumed 
model = load_model('/content/drive/MyDrive/Deep Learning/task3/model_weights.h5')
# Load the checkpoints
weight_file = "/content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000101"
model.load_weights(weight_file).expect_partial()

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7fafbc2d12b0>

In [None]:
#Resuming the model training by loading the previous weights
model.fit(X,y, epochs=150, batch_size = 32 ,initial_epoch=101, callbacks=[es, model_checkpoint_callback])

Epoch 102/150
Epoch 102: loss improved from inf to 4.20946, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000102




Epoch 103/150
Epoch 103: loss improved from 4.20946 to 4.20423, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000103




Epoch 104/150
Epoch 104: loss did not improve from 4.20423
Epoch 105/150
Epoch 105: loss did not improve from 4.20423
Epoch 106/150
Epoch 106: loss did not improve from 4.20423
Epoch 107/150
Epoch 107: loss did not improve from 4.20423
Epoch 108/150
Epoch 108: loss improved from 4.20423 to 4.20342, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000108




Epoch 109/150
Epoch 109: loss did not improve from 4.20342
Epoch 110/150
Epoch 110: loss improved from 4.20342 to 4.19850, saving model to /content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000110




Epoch 111/150
Epoch 111: loss did not improve from 4.19850
Epoch 112/150
Epoch 112: loss did not improve from 4.19850
Epoch 113/150
Epoch 113: loss did not improve from 4.19850
Epoch 114/150
Epoch 114: loss did not improve from 4.19850
Epoch 115/150
Epoch 115: loss did not improve from 4.19850
Epoch 116/150
Epoch 116: loss did not improve from 4.19850
Epoch 117/150
Epoch 117: loss did not improve from 4.19850
Epoch 118/150
Epoch 118: loss did not improve from 4.19850
Epoch 119/150
Epoch 119: loss did not improve from 4.19850
Epoch 120/150
Epoch 120: loss did not improve from 4.19850


<keras.callbacks.History at 0x7f01500c32b0>

The loss did not imrpove after 110 epochs where the training is terminated as the patience set in early stopping is set to 0

In [47]:
#model loaded from the previous training. Training is resumed 
model = load_model('/content/drive/MyDrive/Deep Learning/task3/model_weights.h5')
# Load the checkpoints
weight_file = "/content/drive/MyDrive/Deep Learning/task3/checkpoints/task3_checkpoints.0000110"
model.load_weights(weight_file).expect_partial()

In [40]:
# function to generate text when input text and number of words to be generated are given. The text is preprocessed. 
def text_generator(model, tokenizer, seq_len, feature_text, num_words):
  text = []
  for i in range(num_words):
    token = tokenizer.texts_to_sequences([feature_text])[0]
    token = pad_sequences([token], maxlen = seq_len, truncating='pre')
    # y_pred = model.predict_classes(token)
    y_pred = model.predict(token) 
    y_pred = np.argmax(y_pred, axis=1)

    pred_word = ''
    for word, idx in tokenizer.word_index.items():
      if idx == y_pred:
        pred_word = word
        break
    feature_text += " "+ pred_word
    text.append(pred_word)

  return " ".join(text)

In [46]:
while True:
    text = input('Enter your line: ')
    if text.lower() == 'q':
        print('Session terminated...')
        break

    num_of_words = int(input('Enter the number of words to generate: '))

    try:
        print(text)

        next_sentences = text_generator(model, tokenizer, X.shape[1], text, num_of_words)
        print("The next sentences are:", next_sentences)
        print("Enter q to quit")

    except Exception as e:
        print('Error occurred:', e)
        continue


Enter your line: sea monsters are coming to get us and we have to get back to the shore as soon as possible
Enter the number of words to generate: 50
sea monsters are coming to get us and we have to get back to the shore as soon as possible
The next sentences are: astronomers to allow nine down in the sun whose tongue is written in the cells the captain is the same thing the chief scientist came together marin had grown forever weapons is hardly the final landing the golden section says appeared for the extreme stage and successful supply in the
Enter q to quit
Enter your line: q
Session terminated...
