In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [None]:
# Specify the path to your text file
file_path = r"C:\Users\sikha\OneDrive\Desktop\1661-0.txt"# we can insert any path where the text is saved

# Read the content of the text file
with open(file_path, 'r', encoding='utf-8') as file:
    book_text = file.read()


In [None]:
# Step 1: Preprocess the Text Data
# Assume 'text_data' is your text data in string format

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([book_text])
total_words = len(tokenizer.word_index) + 1

In [None]:
# Create input sequences and labels
input_sequences = tokenizer.texts_to_sequences([book_text])[0]

sequences = []
for i in range(1, len(input_sequences)):
    if i >= max_sequence_length:
        n_gram_sequence = input_sequences[i - max_sequence_length : i + 1]
        sequences.append(n_gram_sequence)

sequences = np.array(sequences)
X, y = sequences[:, :-1], sequences[:, -1]

# Ensure that X has the correct shape
X = pad_sequences(X, maxlen=input_sequence_length, padding='pre')

# Correct the value for num_classes to be the vocabulary size
y = tf.keras.utils.to_categorical(y, num_classes=total_words)  # Use total_words instead of total_words * 100

In [None]:
# Build and train the model
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_length-1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=50, verbose=1)

# Save the model weights
model.save_weights('word_generate_model.h5')

In [None]:
# Get the weights of each layer
embedding_layer_weights = model.layers[0].get_weights()
lstm_layer_weights = model.layers[1].get_weights()
dense_layer_weights = model.layers[2].get_weights()

# You can print or use these weights as needed
print("Embedding Layer Weights:", embedding_layer_weights)
print("LSTM Layer Weights:", lstm_layer_weights)
print("Dense Layer Weights:", dense_layer_weights)


In [None]:
def word_generate(seed_text, next_words, model, max_sequence_length):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_length-1, padding='pre')
        predicted_probabilities = model.predict(token_list, verbose=0)[0]
        predicted_index = np.argmax(predicted_probabilities)
        
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted_index:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text
# Step 4: Generate Text Using the Trained Model
generated_text = word_generate("This is an  ", 10, model, max_sequence_length)
print(generated_text)