In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.utils import to_categorical
import string

# Sample text data
text = """
In the beginning God created the heavens and the earth. Now the earth was formless and empty,
darkness was over the surface of the deep, and the Spirit of God was hovering over the waters.
And God said, “Let there be light,” and there was light. God saw that the light was good,
and he separated the light from the darkness.
"""

# Clean and tokenize text
def clean_text(txt):
    return txt.lower().translate(str.maketrans('', '', string.punctuation))

cleaned_text = clean_text(text)
tokenizer = Tokenizer()
tokenizer.fit_on_texts([cleaned_text])
total_words = len(tokenizer.word_index) + 1

# Generate input sequences
input_sequences = []
for line in cleaned_text.split('.'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    input_sequences += [token_list[:i+1] for i in range(1, len(token_list))]

# Prepare data
max_seq_len = max(len(seq) for seq in input_sequences)
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre'))
X, y = input_sequences[:, :-1], to_categorical(input_sequences[:, -1], num_classes=total_words)

# Build and compile model
model = Sequential([
    Embedding(total_words, 100, input_length=max_seq_len-1),
    LSTM(150, return_sequences=True),
    LSTM(100),
    Dense(total_words, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Train model
model.fit(X, y, epochs=100, verbose=1)

# Generate new text
def generate_text(seed_text, next_words, model, max_seq_len):
    for _ in range(next_words):
        token_list = pad_sequences([tokenizer.texts_to_sequences([seed_text])[0]], maxlen=max_seq_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list), axis=-1)
        seed_text += " " + next(word for word, idx in tokenizer.word_index.items() if idx == predicted)
    return seed_text

seed_text = "God said"
print(generate_text(seed_text, 10, model, max_seq_len))




Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.0222 - loss: 3.4958  
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.1951 - loss: 3.4717
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.2160 - loss: 3.4259
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.1951 - loss: 3.3056
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.2056 - loss: 3.1652
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.1639 - loss: 3.2419
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.1847 - loss: 3.0790
Epoch 8/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.1951 - loss: 3.0459
Epoch 9/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m