# Importing the libraries

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Text processing

Sample text for training

In [None]:
text = "yes chatgpt I have overcome procrastination many years earlier after particular and focused effort. Now that feeling is back and fills me with dread. When i try to learn something you see."

Tokenize the text

In [None]:
tokenizer = Tokenizer(char_level=True)  # Tokenizing at character level
tokenizer.fit_on_texts([text])
total_chars = len(tokenizer.word_index) + 1  # Total unique characters

Create sequences

In [None]:
input_sequences = []
for i in range(1, len(text)):
    seq = text[:i + 1]
    encoded = tokenizer.texts_to_sequences([seq])[0]
    input_sequences.append(encoded)

Pad sequences to ensure uniform length

In [None]:
max_seq_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre')


Split into inputs (X) and targets (y)

In [None]:
X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_chars)


# Build the RNN model

In [None]:
model = Sequential([
    Embedding(total_chars, 50, input_length=max_seq_len - 1),  # Embedding layer
    LSTM(100, return_sequences=False),  # LSTM layer
    Dense(total_chars, activation='softmax')  # Output layer
])


In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
print(model.summary())


None


#Train the model

In [None]:
model.fit(X, y, epochs=100, verbose=1)


Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 123ms/step - accuracy: 0.0518 - loss: 3.2156
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 120ms/step - accuracy: 0.1262 - loss: 3.1842
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 125ms/step - accuracy: 0.1619 - loss: 3.0591
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 130ms/step - accuracy: 0.1698 - loss: 2.9296
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 124ms/step - accuracy: 0.1637 - loss: 2.9114
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 123ms/step - accuracy: 0.1645 - loss: 2.8959
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 175ms/step - accuracy: 0.1615 - loss: 2.8675
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 213ms/step - accuracy: 0.1786 - loss: 2.8714
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x78e163776890>

# Text generation function

In [None]:
def generate_text(seed_text, next_chars=100):
    for _ in range(next_chars):
        # Encode seed text
        encoded = tokenizer.texts_to_sequences([seed_text])[0]
        encoded = pad_sequences([encoded], maxlen=max_seq_len - 1, padding='pre')

        # Predict next character
        predicted = np.argmax(model.predict(encoded, verbose=0), axis=-1)
        next_char = tokenizer.sequences_to_texts([[predicted[0]]])[0]

        # Append to seed text
        seed_text += next_char

    return seed_text


# Generate text

In [None]:
seed_text = "yes chatgpt I have overcome"
generated_text = generate_text(seed_text, next_chars=100)
print("Generated Text:")
print(generated_text)

Generated Text:
yes chatgpt I have overcome procrastination many years earlier after particular and focused effort. now that feeling is back an
