Creating word suggestion

In [None]:
import numpy as np
import tensorflow as tf
import keras
from keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.utils import to_categorical

In [None]:
sentences=[ "I Love ML",
           "ML is amazing",
           "My name is swathika",
            "I enjoy learning new things",
            "I love sports",
            "My Favourite actor is VIJAY",
            "I watched all Vijay movies"]

In [None]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(sentences)

In [None]:
tokenizer.word_index

{'i': 1,
 'is': 2,
 'love': 3,
 'ml': 4,
 'my': 5,
 'vijay': 6,
 'amazing': 7,
 'name': 8,
 'swathika': 9,
 'enjoy': 10,
 'learning': 11,
 'new': 12,
 'things': 13,
 'sports': 14,
 'favourite': 15,
 'actor': 16,
 'watched': 17,
 'all': 18,
 'movies': 19}

In [None]:
total_words=len(tokenizer.word_index)+1  #leaving 0th index for unknown words(the word that doesnt used to train)
total_words

20

In [None]:
input_sequences=[]
for i in sentences:
  token_list=tokenizer.texts_to_sequences([i])[0]
  for j in range(1,len(token_list)):
    n_gram_sequence=token_list[:j+1]
    input_sequences.append(n_gram_sequence)

In [None]:
input_sequences

[[1, 3],
 [1, 3, 4],
 [4, 2],
 [4, 2, 7],
 [5, 8],
 [5, 8, 2],
 [5, 8, 2, 9],
 [1, 10],
 [1, 10, 11],
 [1, 10, 11, 12],
 [1, 10, 11, 12, 13],
 [1, 3],
 [1, 3, 14],
 [5, 15],
 [5, 15, 16],
 [5, 15, 16, 2],
 [5, 15, 16, 2, 6],
 [1, 17],
 [1, 17, 18],
 [1, 17, 18, 6],
 [1, 17, 18, 6, 19]]

In [None]:
#pre pading sequences
max_sequence_len=max([len(x) for x in input_sequences])
print(max_sequence_len)
input_sequences=np.array(pad_sequences(input_sequences,maxlen=max_sequence_len,padding='pre'))

5


In [None]:
input_sequences

array([[ 0,  0,  0,  1,  3],
       [ 0,  0,  1,  3,  4],
       [ 0,  0,  0,  4,  2],
       [ 0,  0,  4,  2,  7],
       [ 0,  0,  0,  5,  8],
       [ 0,  0,  5,  8,  2],
       [ 0,  5,  8,  2,  9],
       [ 0,  0,  0,  1, 10],
       [ 0,  0,  1, 10, 11],
       [ 0,  1, 10, 11, 12],
       [ 1, 10, 11, 12, 13],
       [ 0,  0,  0,  1,  3],
       [ 0,  0,  1,  3, 14],
       [ 0,  0,  0,  5, 15],
       [ 0,  0,  5, 15, 16],
       [ 0,  5, 15, 16,  2],
       [ 5, 15, 16,  2,  6],
       [ 0,  0,  0,  1, 17],
       [ 0,  0,  1, 17, 18],
       [ 0,  1, 17, 18,  6],
       [ 1, 17, 18,  6, 19]], dtype=int32)

In [None]:
#creating input and output data
input=input_sequences[:,:-1]
output=input_sequences[:,-1]

In [None]:
print(input[0])
print(output[0])

[0 0 0 1]
3


In [None]:
print(input[3])
print(output[3])

[0 0 4 2]
7


In [None]:
output_c=to_categorical(output,num_classes=total_words)
output_c[0] #put 1 in the output place refer input sequence or in print input[0]

array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [None]:
#Build the rnn model
RNN=keras.Sequential([
    layers.Embedding(total_words,100,input_length=max_sequence_len-1),
    layers.SimpleRNN(100),
    layers.Dense(total_words,activation='softmax')
])



In [None]:
RNN.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
RNN.fit(input,output_c,epochs=100,verbose=1)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.0476 - loss: 2.9931
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.1429 - loss: 2.9462
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.3333 - loss: 2.8995
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.4762 - loss: 2.8522
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.5238 - loss: 2.8039
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.5238 - loss: 2.7540
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.3810 - loss: 2.7024
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step - accuracy: 0.4286 - loss: 2.6488
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0x7ee66c8990a0>

In [None]:
seed_text=__builtins__.input("Enter the text: ")
token_list=tokenizer.texts_to_sequences([seed_text])[0] #tokenizing
token_list=pad_sequences([token_list],maxlen=max_sequence_len-1,padding='pre') #padding
predicted=RNN.predict(token_list)
predicted

Enter the text: I Love
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


array([[2.4816516e-04, 7.7044114e-04, 1.2169132e-03, 7.1858329e-04,
        4.8036581e-01, 6.2691164e-04, 2.5253592e-04, 4.4532513e-04,
        4.5334423e-04, 3.0264998e-04, 8.8418496e-04, 1.4743310e-02,
        3.0617346e-04, 1.4366575e-04, 4.5771718e-01, 1.3647686e-03,
        4.2472008e-04, 1.8630510e-04, 3.7931412e-02, 8.9766528e-04]],
      dtype=float32)

In [None]:
predicted=np.argmax(predicted,axis=1)
print(predicted)

[4]


In [None]:
predicted_word=""
for word,index in tokenizer.word_index.items():
  if index==predicted:
    predicted_word=word
    break
print(f"The next word is:{predicted_word}")

The next word is:ml
