Importing Libraries

In [10]:
pip install --upgrade tensorflow keras



In [11]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [12]:
# Sample text data
data = """RNNs are a type of neural network that are well suited for
processing sequential data such as text. They have been used for tasks
like language modeling and text generation."""

In [13]:
# Split the data into sentences (for simplicity, we're treating the whole string as one sentence)
sentences = data.lower().split('.')
sentences = [sentence.strip() for sentence in sentences if sentence]

In [14]:
# Print the sentences
print(sentences)

['rnns are a type of neural network that are well suited for \nprocessing sequential data such as text', 'they have been used for tasks \nlike language modeling and text generation']


Prepare Data

In [15]:
# Tokenize the text
tokenizer= Tokenizer()
tokenizer.fit_on_texts(sentences)
total_words = len(tokenizer.word_index) + 1
# Convert sentences to sequences of word indices
input_sequences = []
for sentence in sentences:
    token_list = tokenizer.texts_to_sequences([sentence])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences to ensure they are of the same length
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# Create input and label arrays
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

Build the Model

In [18]:
# Build the model
model= Sequential()
model.add(Embedding(total_words,10,input_length=max_sequence_len-1))
model.add(SimpleRNN(150,return_sequences=False))
model.add(Dense(total_words,activation='softmax'))
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Print the model summary
model.summary()



In [19]:
print("Shape of X:", X.shape)  # Should be (num_sequences, max_sequence_len-1)
print("Shape of y:", y.shape)  # Should be (num_sequences, total_words)


Shape of X: (28, 17)
Shape of y: (28, 28)


Train the model

In [20]:
# Train the model
history = model.fit(X, y, epochs=100, verbose=1)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.0357 - loss: 3.3415
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step - accuracy: 0.0714 - loss: 3.2982
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.1786 - loss: 3.2559
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.2500 - loss: 3.2091
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.2500 - loss: 3.1552
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.1786 - loss: 3.0954
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.2143 - loss: 3.0331
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.2500 - loss: 2.9675
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

Generate the text

In [21]:
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list), axis=-1)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

# Generate text
seed_text = "RNNs are a type"
next_words = 3
generated_text = generate_text(seed_text, next_words, model, max_sequence_len)
print(generated_text)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
RNNs are a type of neural network
