## Load the input data

In [3]:
file_path='/content/drive/MyDrive/archive (2)/1661-0.txt'
with open(file_path,'r') as f:
  data=f.read()

## Tokenize the text

In [4]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer

In [5]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts([data])

In [6]:
len(tokenizer.word_index)

8931

## Create input-output sequences

In [7]:
input_sequences=[]
for i in data.split('\n'):
  tokenized_sentence=tokenizer.texts_to_sequences([i])[0]

  for i in range(1,len(tokenized_sentence)):
    input_sequences.append(tokenized_sentence[:i+1])

In [8]:
input_sequences

[[145, 4790],
 [145, 4790, 1],
 [145, 4790, 1, 1020],
 [145, 4790, 1, 1020, 4],
 [145, 4790, 1, 1020, 4, 128],
 [145, 4790, 1, 1020, 4, 128, 34],
 [145, 4790, 1, 1020, 4, 128, 34, 45],
 [145, 4790, 1, 1020, 4, 128, 34, 45, 611],
 [145, 4790, 1, 1020, 4, 128, 34, 45, 611, 2235],
 [145, 4790, 1, 1020, 4, 128, 34, 45, 611, 2235, 2236],
 [30, 1021],
 [30, 1021, 15],
 [30, 1021, 15, 23],
 [30, 1021, 15, 23, 1],
 [30, 1021, 15, 23, 1, 275],
 [30, 1021, 15, 23, 1, 275, 4],
 [30, 1021, 15, 23, 1, 275, 4, 394],
 [30, 1021, 15, 23, 1, 275, 4, 394, 2237],
 [30, 1021, 15, 23, 1, 275, 4, 394, 2237, 21],
 [30, 1021, 15, 23, 1, 275, 4, 394, 2237, 21, 51],
 [30, 1021, 15, 23, 1, 275, 4, 394, 2237, 21, 51, 1676],
 [30, 1021, 15, 23, 1, 275, 4, 394, 2237, 21, 51, 1676, 2],
 [30, 1021, 15, 23, 1, 275, 4, 394, 2237, 21, 51, 1676, 2, 18],
 [572, 51],
 [572, 51, 3398],
 [572, 51, 3398, 3399],
 [572, 51, 3398, 3399, 13],
 [572, 51, 3398, 3399, 13, 75],
 [572, 51, 3398, 3399, 13, 75, 817],
 [572, 51, 3398, 33

## Get the maximum length from input sequences

In [9]:
max_len=max((len(x)for x in input_sequences))

In [10]:
max_len

20

## Pad the sequences (Here we used Pre-padding)

In [11]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [12]:
pad_input_sequence=pad_sequences(input_sequences,maxlen=max_len,padding='pre')

In [13]:
pad_input_sequence

array([[   0,    0,    0, ...,    0,  145, 4790],
       [   0,    0,    0, ...,  145, 4790,    1],
       [   0,    0,    0, ..., 4790,    1, 1020],
       ...,
       [   0,    0,    0, ...,    3,  360,   83],
       [   0,    0,    0, ...,  360,   83,  358],
       [   0,    0,    0, ...,   83,  358, 1673]], dtype=int32)

## Create predictors and label

In [14]:
x=pad_input_sequence[:,:-1]
y=pad_input_sequence[:,-1]

In [15]:
x.shape

(101619, 19)

In [16]:
y.shape

(101619,)

## Convert y to one-hot encoded format

In [17]:
from tensorflow.keras.utils import to_categorical
y=to_categorical(y,num_classes=8932)

In [18]:
y.shape

(101619, 8932)

In [19]:
y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [20]:
# Vocabulary size including 0 for padding
vocab_size = len(tokenizer.word_index) + 1

## Import the necessary model libraries

In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

## Define the LSTM model

In [22]:
model = Sequential()
model.add(Embedding(vocab_size, 100, input_length=max_len-1))
model.add(LSTM(150, return_sequences=True))
model.add(LSTM(150, return_sequences=True))
model.add(LSTM(150))
model.add(Dense(vocab_size, activation='softmax'))

## Compile the model

In [23]:
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

## Summary of model

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 19, 100)           893200    
                                                                 
 lstm (LSTM)                 (None, 19, 150)           150600    
                                                                 
 lstm_1 (LSTM)               (None, 19, 150)           180600    
                                                                 
 lstm_2 (LSTM)               (None, 150)               180600    
                                                                 
 dense (Dense)               (None, 8932)              1348732   
                                                                 
Total params: 2753732 (10.50 MB)
Trainable params: 2753732 (10.50 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## Train the model

In [25]:
model.fit(x,y,epochs=200,callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])

Epoch 1/200



Epoch 2/200



Epoch 3/200



Epoch 4/200



Epoch 5/200



Epoch 6/200



Epoch 7/200



Epoch 8/200



Epoch 9/200



Epoch 10/200



Epoch 11/200



Epoch 12/200



Epoch 13/200



Epoch 14/200



Epoch 15/200



Epoch 16/200



Epoch 17/200



Epoch 18/200



Epoch 19/200



Epoch 20/200



Epoch 21/200



Epoch 22/200



Epoch 23/200



Epoch 24/200



Epoch 25/200



Epoch 26/200



Epoch 27/200



Epoch 28/200



Epoch 29/200



Epoch 30/200



Epoch 31/200



Epoch 32/200



Epoch 33/200



Epoch 34/200



Epoch 35/200



Epoch 36/200



Epoch 37/200



Epoch 38/200



Epoch 39/200



Epoch 40/200



Epoch 41/200



Epoch 42/200



Epoch 43/200



Epoch 44/200



Epoch 45/200



Epoch 46/200



Epoch 47/200



Epoch 48/200



Epoch 49/200



Epoch 50/200



Epoch 51/200



Epoch 52/200



Epoch 53/200



Epoch 54/200



Epoch 55/200



Epoch 56/200



Epoch 57/200



Epoch 58/200



Epoch 59/200



Epoch 60/200



Epoch 61/200



Epoch 62/200



Epoch 63/200



Epoch 64/200



Epoch 65/200



Epoch 66/200



Epoch 67/200



Epoch 68/200



Epoch 69/200



Epoch 70/200



Epoch 71/200



Epoch 72/200



Epoch 73/200



Epoch 74/200



Epoch 75/200



Epoch 76/200



Epoch 77/200



Epoch 78/200



Epoch 79/200



Epoch 80/200



Epoch 81/200



Epoch 82/200



Epoch 83/200



Epoch 84/200



Epoch 85/200



Epoch 86/200



Epoch 87/200



Epoch 88/200



Epoch 89/200



Epoch 90/200



Epoch 91/200



Epoch 92/200



Epoch 93/200



Epoch 94/200



Epoch 95/200



Epoch 96/200



Epoch 97/200



Epoch 98/200



Epoch 99/200



Epoch 100/200



Epoch 101/200



Epoch 102/200



Epoch 103/200



Epoch 104/200



Epoch 105/200



Epoch 106/200



Epoch 107/200



Epoch 108/200



Epoch 109/200



Epoch 110/200



Epoch 111/200



Epoch 112/200



Epoch 113/200



Epoch 114/200



Epoch 115/200



Epoch 116/200



Epoch 117/200



Epoch 118/200



Epoch 119/200



Epoch 120/200



Epoch 121/200



Epoch 122/200



Epoch 123/200



Epoch 124/200



Epoch 125/200



Epoch 126/200



Epoch 127/200



Epoch 128/200



Epoch 129/200



Epoch 130/200



Epoch 131/200



Epoch 132/200



Epoch 133/200



Epoch 134/200



Epoch 135/200



Epoch 136/200



Epoch 137/200



Epoch 138/200



Epoch 139/200



Epoch 140/200



Epoch 141/200



Epoch 142/200



Epoch 143/200



Epoch 144/200



Epoch 145/200



Epoch 146/200



Epoch 147/200



Epoch 148/200



Epoch 149/200



Epoch 150/200



Epoch 151/200



Epoch 152/200



Epoch 153/200



Epoch 154/200



Epoch 155/200



Epoch 156/200



Epoch 157/200



Epoch 158/200



Epoch 159/200



Epoch 160/200



Epoch 161/200



Epoch 162/200



Epoch 163/200



Epoch 164/200



Epoch 165/200



Epoch 166/200



Epoch 167/200



Epoch 168/200



Epoch 169/200



Epoch 170/200



Epoch 171/200



Epoch 172/200



Epoch 173/200



Epoch 174/200



Epoch 175/200



Epoch 176/200



Epoch 177/200



Epoch 178/200



Epoch 179/200



Epoch 180/200



Epoch 181/200



Epoch 182/200



Epoch 183/200



Epoch 184/200



Epoch 185/200



Epoch 186/200



Epoch 187/200



Epoch 188/200



Epoch 189/200



Epoch 190/200



Epoch 191/200



Epoch 192/200



Epoch 193/200



Epoch 194/200



Epoch 195/200



Epoch 196/200



Epoch 197/200



Epoch 198/200



Epoch 199/200



Epoch 200/200





<keras.src.callbacks.History at 0x7facb0004c10>

## Save the model

In [26]:
path='/content/drive/MyDrive/'

In [27]:
model.save('second.h5',path)

  saving_api.save_model(


## Load the model

In [28]:
from tensorflow.keras.models import load_model

In [29]:
loaded_model=load_model('second.h5')

In [30]:
loaded_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 19, 100)           893200    
                                                                 
 lstm (LSTM)                 (None, 19, 150)           150600    
                                                                 
 lstm_1 (LSTM)               (None, 19, 150)           180600    
                                                                 
 lstm_2 (LSTM)               (None, 150)               180600    
                                                                 
 dense (Dense)               (None, 8932)              1348732   
                                                                 
Total params: 2753732 (10.50 MB)
Trainable params: 2753732 (10.50 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## Generate the text or predict next words from given input word(s)

In [49]:
import time
import numpy as np

text=' Mr.Sherlock Holmes was'

for i in range(15):
  #tokenize
  token_text=tokenizer.texts_to_sequences([text])[0]
  #padding
  padded_token_text=pad_sequences([token_text],maxlen=19,padding='pre')
  #predict
  pos=np.argmax(model.predict(padded_token_text))

  for word,index in tokenizer.word_index.items():
    if index==pos:
      text = text + " " + word
      print(text)
      time.sleep(1)

 Mr.Sherlock Holmes was following
 Mr.Sherlock Holmes was following my
 Mr.Sherlock Holmes was following my friend
 Mr.Sherlock Holmes was following my friend on
 Mr.Sherlock Holmes was following my friend on examining
 Mr.Sherlock Holmes was following my friend on examining it
 Mr.Sherlock Holmes was following my friend on examining it by
 Mr.Sherlock Holmes was following my friend on examining it by a
 Mr.Sherlock Holmes was following my friend on examining it by a long
 Mr.Sherlock Holmes was following my friend on examining it by a long edge
 Mr.Sherlock Holmes was following my friend on examining it by a long edge which
 Mr.Sherlock Holmes was following my friend on examining it by a long edge which we
 Mr.Sherlock Holmes was following my friend on examining it by a long edge which we had
 Mr.Sherlock Holmes was following my friend on examining it by a long edge which we had one
 Mr.Sherlock Holmes was following my friend on examining it by a long edge which we had one waiting
