<a href="https://colab.research.google.com/github/Epsita-R/NLP/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

text_corpus = """
The quick brown fox jumps over the lazy dog.
A stitch in time saves nine.
All that glitters is not gold.
Actions speak louder than words.
Birds of a feather flock together.
"""

tokenizer = Tokenizer()
tokenizer.fit_on_texts([text_corpus])
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for line in text_corpus.split('\n'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

X, y = input_sequences[:,:-1],input_sequences[:,-1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

model = Sequential()
model.add(Embedding(total_words, 10, input_length=max_sequence_len-1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X, y, epochs=100, verbose=2)

def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted_probs = model.predict(token_list)[0]
        predicted_index = np.random.choice(len(predicted_probs), p=predicted_probs)
        output_word = tokenizer.index_word[predicted_index]
        seed_text += " " + output_word
    return seed_text

generated_text = generate_text("the", 10, model, max_sequence_len)
print(generated_text)


Epoch 1/100
1/1 - 2s - loss: 3.4353 - accuracy: 0.0370 - 2s/epoch - 2s/step
Epoch 2/100
1/1 - 0s - loss: 3.4336 - accuracy: 0.0741 - 12ms/epoch - 12ms/step
Epoch 3/100
1/1 - 0s - loss: 3.4318 - accuracy: 0.1111 - 12ms/epoch - 12ms/step
Epoch 4/100
1/1 - 0s - loss: 3.4301 - accuracy: 0.1111 - 12ms/epoch - 12ms/step
Epoch 5/100
1/1 - 0s - loss: 3.4283 - accuracy: 0.1111 - 12ms/epoch - 12ms/step
Epoch 6/100
1/1 - 0s - loss: 3.4264 - accuracy: 0.1111 - 12ms/epoch - 12ms/step
Epoch 7/100
1/1 - 0s - loss: 3.4243 - accuracy: 0.1111 - 15ms/epoch - 15ms/step
Epoch 8/100
1/1 - 0s - loss: 3.4222 - accuracy: 0.0741 - 13ms/epoch - 13ms/step
Epoch 9/100
1/1 - 0s - loss: 3.4198 - accuracy: 0.0741 - 13ms/epoch - 13ms/step
Epoch 10/100
1/1 - 0s - loss: 3.4173 - accuracy: 0.0741 - 13ms/epoch - 13ms/step
Epoch 11/100
1/1 - 0s - loss: 3.4145 - accuracy: 0.0741 - 13ms/epoch - 13ms/step
Epoch 12/100
1/1 - 0s - loss: 3.4114 - accuracy: 0.0370 - 12ms/epoch - 12ms/step
Epoch 13/100
1/1 - 0s - loss: 3.4080 - ac