In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample corpus (Shakespeare's Sonnet 1)
corpus = [
    "From fairest creatures we desire increase,",
    "That thereby beauty's rose might never die,",
    "But as the riper should by time decease,",
    "His tender heir might bear his memory:",
    "But thou, contracted to thine own bright eyes,",
    "Feed'st thy light's flame with self-substantial fuel,",
    "Making a famine where abundance lies,",
    "Thyself thy foe, to thy sweet self too cruel.",
]

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

# Create input sequences
input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    print(tokenizer.texts_to_sequences([line]))
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Padding sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')

# Create predictors and label
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

# Convert label to one-hot encoding
label = tf.keras.utils.to_categorical(label, num_classes=total_words)

# Build LSTM Model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len-1),
    tf.keras.layers.LSTM(150, return_sequences=True),
    tf.keras.layers.LSTM(100),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(predictors, label, epochs=100, verbose=1)

# Generate text
def generate_text(seed_text, next_words, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)
        predicted_index = np.argmax(predicted_probs)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted_index:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

# Sample input
input_text = "From fairest creatures we desire"

# Generate text
generated_text = generate_text(input_text,10, max_sequence_len)
print(generated_text)


[[7, 8, 9, 10, 11, 12]]
[[13, 14, 15, 16, 2, 17, 18]]
[[3, 19, 20, 21, 22, 23, 24, 25]]
[[4, 26, 27, 2, 28, 4, 29]]
[[3, 30, 31, 5, 32, 33, 34, 35]]
[[36, 1, 37, 38, 39, 6, 40, 41]]
[[42, 43, 44, 45, 46, 47]]
[[48, 1, 49, 5, 1, 50, 6, 51, 52]]
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/10