In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

# Example dataset for language modeling
texts = ["This is an example sentence.", "Another example sentence for language modeling.", "Language models are crucial for NLP tasks."]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
total_words = len(tokenizer.word_index) + 1

# Create input sequences and corresponding target values
input_sequences = []
for line in texts:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_sequence_length = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')
X_train_language = input_sequences[:, :-1]
y_train_language = to_categorical(input_sequences[:, -1], num_classes=total_words)

# LSTM model for language modeling in NLP tasks using TensorFlow and Keras
model_language = Sequential()
model_language.add(LSTM(32, activation="relu", input_shape=(max_sequence_length-1, 1)))
model_language.add(Dense(total_words, activation="softmax"))
model_language.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
history_language = model_language.fit(X_train_language, y_train_language, epochs=20, batch_size=10, validation_split=0.1, verbose=1)

# Generate sample outputs
seed_text = "This is an example"
for _ in range(10):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_length-1, padding='pre')
    predicted_probs = model_language.predict(token_list)[0]
    predicted_token = np.random.choice(len(predicted_probs), p=predicted_probs)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted_token:
            output_word = word
            break
    seed_text += " " + output_word
print("Generated Output:", seed_text)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Generated Output: This is an example example example modeling models for sentence example modeling  another
