In [None]:
pip install tensorflow



In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Embedding
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

In [None]:
#generating some example sequential data
sentences=["hey all","I am working in google colab","we are working on RNN now in colab"]

In [None]:
#Tokenizing the words
tokenizer=Tokenizer()
tokenizer.fit_on_texts(sentences)
total_words=len(tokenizer.word_index)+1
print(total_words)

14


In [None]:
# Creating input sequences and their corresponding next words
input_sequences = []
for sentence in sentences:
    tokenized_sentence = tokenizer.texts_to_sequences([sentence])[0]
    for i in range(1, len(tokenized_sentence)):
        n_gram_sequence = tokenized_sentence[:i+1]
        input_sequences.append(n_gram_sequence)
input_sequences

[[4, 5],
 [6, 7],
 [6, 7, 1],
 [6, 7, 1, 2],
 [6, 7, 1, 2, 8],
 [6, 7, 1, 2, 8, 3],
 [9, 10],
 [9, 10, 1],
 [9, 10, 1, 11],
 [9, 10, 1, 11, 12],
 [9, 10, 1, 11, 12, 13],
 [9, 10, 1, 11, 12, 13, 2],
 [9, 10, 1, 11, 12, 13, 2, 3]]

In [None]:
# Padding sequences for consistent input size
max_sequence_length = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length,padding='pre')

In [None]:
input_sequences

array([[ 0,  0,  0,  0,  0,  0,  4,  5],
       [ 0,  0,  0,  0,  0,  0,  6,  7],
       [ 0,  0,  0,  0,  0,  6,  7,  1],
       [ 0,  0,  0,  0,  6,  7,  1,  2],
       [ 0,  0,  0,  6,  7,  1,  2,  8],
       [ 0,  0,  6,  7,  1,  2,  8,  3],
       [ 0,  0,  0,  0,  0,  0,  9, 10],
       [ 0,  0,  0,  0,  0,  9, 10,  1],
       [ 0,  0,  0,  0,  9, 10,  1, 11],
       [ 0,  0,  0,  9, 10,  1, 11, 12],
       [ 0,  0,  9, 10,  1, 11, 12, 13],
       [ 0,  9, 10,  1, 11, 12, 13,  2],
       [ 9, 10,  1, 11, 12, 13,  2,  3]], dtype=int32)

In [None]:
# Creating input and output data
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = to_categorical(y, num_classes=total_words)

In [None]:
# Building a simple RNN model
model = Sequential()
model.add(Embedding(input_dim=total_words, output_dim=50, input_length=max_sequence_length-1))
model.add(SimpleRNN(100, return_sequences=True))
model.add(SimpleRNN(100))
model.add(Dense(total_words, activation='softmax'))

In [None]:
#compiling the model
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

#Training the model
model.fit(X,y,epochs=50,verbose=2)

Epoch 1/50
1/1 - 2s - loss: 2.7228 - accuracy: 0.0769 - 2s/epoch - 2s/step
Epoch 2/50
1/1 - 0s - loss: 2.5633 - accuracy: 0.2308 - 11ms/epoch - 11ms/step
Epoch 3/50
1/1 - 0s - loss: 2.4288 - accuracy: 0.4615 - 10ms/epoch - 10ms/step
Epoch 4/50
1/1 - 0s - loss: 2.3057 - accuracy: 0.4615 - 13ms/epoch - 13ms/step
Epoch 5/50
1/1 - 0s - loss: 2.1896 - accuracy: 0.4615 - 12ms/epoch - 12ms/step
Epoch 6/50
1/1 - 0s - loss: 2.0808 - accuracy: 0.3846 - 11ms/epoch - 11ms/step
Epoch 7/50
1/1 - 0s - loss: 1.9796 - accuracy: 0.3846 - 11ms/epoch - 11ms/step
Epoch 8/50
1/1 - 0s - loss: 1.8835 - accuracy: 0.3846 - 11ms/epoch - 11ms/step
Epoch 9/50
1/1 - 0s - loss: 1.7883 - accuracy: 0.5385 - 12ms/epoch - 12ms/step
Epoch 10/50
1/1 - 0s - loss: 1.6922 - accuracy: 0.6154 - 11ms/epoch - 11ms/step
Epoch 11/50
1/1 - 0s - loss: 1.5967 - accuracy: 0.6154 - 11ms/epoch - 11ms/step
Epoch 12/50
1/1 - 0s - loss: 1.5042 - accuracy: 0.6923 - 11ms/epoch - 11ms/step
Epoch 13/50
1/1 - 0s - loss: 1.4152 - accuracy: 0.692

<keras.src.callbacks.History at 0x7ec779430c40>

In [None]:
#Test:
# Generating text using the trained model
seed_text = input("Enter the starting word: ")
next_words = int(input("Enter how many words to predict: "))

for _ in range(next_words):
    tokenized_seed = tokenizer.texts_to_sequences([seed_text])[0]
    tokenized_seed = pad_sequences([tokenized_seed], maxlen=max_sequence_length-1, padding='pre')
    predicted_word_index = np.argmax(model.predict(tokenized_seed), axis=-1)
    predicted_word = tokenizer.index_word[predicted_word_index[0]]
    seed_text += " " + predicted_word

print(seed_text)

Enter the starting word: hey
Enter how many words to predict: 5
hey all working in google colab
