In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np


sentence = "deeplearning is amazing, deeplearning builds intelligent"


words = sentence.split()


tokenizer = Tokenizer()
tokenizer.fit_on_texts([sentence])
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for i in range(1, len(words)):
    n_gram_sequence = words[:i+1]
    input_sequences.append(tokenizer.texts_to_sequences([' '.join(n_gram_sequence)])[0])


max_seq_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre'))

xs, labels = input_sequences[:, :-1], input_sequences[:, -1]

ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 50, input_length=max_seq_len-1),
    tf.keras.layers.LSTM(100),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(xs, ys, epochs=500, verbose=0)

def predict_next_word(model, tokenizer, text, max_seq_len):
    sequence = tokenizer.texts_to_sequences([text])[0]
    sequence = pad_sequences([sequence], maxlen=max_seq_len-1, padding='pre')
    predicted = model.predict(sequence, verbose=0)
    return tokenizer.index_word[np.argmax(predicted)]

for i in range(len(words)):
    current_text = ' '.join(words[:i+1])
    next_word = predict_next_word(model, tokenizer, current_text, max_seq_len)
    print(f"Input: '{current_text}' -> Predicted Next Word: '{next_word}'")


Input: 'deeplearning' -> Predicted Next Word: 'is'
Input: 'deeplearning is' -> Predicted Next Word: 'amazing'
Input: 'deeplearning is amazing,' -> Predicted Next Word: 'deeplearning'
Input: 'deeplearning is amazing, deeplearning' -> Predicted Next Word: 'builds'
Input: 'deeplearning is amazing, deeplearning builds' -> Predicted Next Word: 'intelligent'
Input: 'deeplearning is amazing, deeplearning builds intelligent' -> Predicted Next Word: 'intelligent'


In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd


data = [
    ("Deep learning is", "amazing"),
    ("Deep learning builds intelligent", "systems"),
    ("Intelligent systems can learn", "quickly")
]


all_text = " ".join([item[0] + " " + item[1] for item in data])

tokenizer = Tokenizer()
tokenizer.fit_on_texts([all_text])
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for input_text, expected_word in data:
    text = input_text + " " + expected_word
    token_list = tokenizer.texts_to_sequences([text])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_seq_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre'))

xs, labels = input_sequences[:, :-1], input_sequences[:, -1]
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)


model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 50, input_length=max_seq_len-1),
    tf.keras.layers.LSTM(100),
    tf.keras.layers.Dense(total_words, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(xs, ys, epochs=500, verbose=0)

def predict_next_word(model, tokenizer, text, max_seq_len):
    token_list = tokenizer.texts_to_sequences([text])[0]
    token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
    predicted = model.predict(token_list, verbose=0)
    return tokenizer.index_word[np.argmax(predicted)]

# Generate predictions and check correctness
results = []
for input_text, expected_word in data:
    predicted_word = predict_next_word(model, tokenizer, input_text, max_seq_len)
    correct = "Y" if predicted_word.lower() == expected_word.lower() else "N"
    results.append([input_text, predicted_word, correct])

df = pd.DataFrame(results, columns=["Input Text", "Predicted Word", "Correct (Y/N)"])
print(df.to_string(index=False))


                      Input Text Predicted Word Correct (Y/N)
                Deep learning is        amazing             Y
Deep learning builds intelligent        systems             Y
   Intelligent systems can learn        quickly             Y


In [4]:


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import numpy as np

corpus = [
    "Shall I compare thee to a summer's day",
    "Thou art more lovely and more temperate",
    "Rough winds do shake the darling buds of May",
    "And summer's lease hath all too short a date"
]


tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = to_categorical(y, num_classes=total_words)


model = Sequential([
    Embedding(total_words, 100, input_length=max_len-1),
    LSTM(150),
    Dense(total_words, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


model.fit(X, y, epochs=50, verbose=1, validation_split=0.2)

def predict_next_words(model, tokenizer, text, next_words=3):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([text])[0]
        token_list = pad_sequences([token_list], maxlen=max_len-1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        output_word = tokenizer.index_word[np.argmax(predicted)]
        text += " " + output_word
    return text


print("Generated text:")
print(predict_next_words(model, tokenizer, "Shall I compare", 5))


Epoch 1/50




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.0000e+00 - loss: 3.4004 - val_accuracy: 0.0000e+00 - val_loss: 3.4095
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step - accuracy: 0.1304 - loss: 3.3903 - val_accuracy: 0.0000e+00 - val_loss: 3.4150
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step - accuracy: 0.1739 - loss: 3.3801 - val_accuracy: 0.0000e+00 - val_loss: 3.4211
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 262ms/step - accuracy: 0.3043 - loss: 3.3692 - val_accuracy: 0.0000e+00 - val_loss: 3.4285
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - accuracy: 0.3043 - loss: 3.3574 - val_accuracy: 0.0000e+00 - val_loss: 3.4374
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step - accuracy: 0.3478 - loss: 3.3441 - val_accuracy: 0.0000e+00 - val_loss: 3.4486
Epoch 7/50
[1m1/1[0m [3

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import numpy as np
import pandas as pd

corpus = [
    "To be or not to be",
    "What light through yonder window breaks",
    "O Romeo Romeo wherefore art thou Romeo",
    "Parting is such sweet sorrow",
    "Shall I compare thee to a summer's day"
]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = to_categorical(y, num_classes=total_words)


model = Sequential([
    Embedding(total_words, 100, input_length=max_len-1),
    LSTM(150),
    Dense(total_words, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


model.fit(X, y, epochs=50, verbose=0)


def predict_next_word(model, tokenizer, text, max_len):
    token_list = tokenizer.texts_to_sequences([text])[0]
    token_list = pad_sequences([token_list], maxlen=max_len-1, padding='pre')
    predicted = model.predict(token_list, verbose=0)
    return tokenizer.index_word[np.argmax(predicted)]

test_data = [
    ("To be or not", "to"),
    ("What light through yonder", "window")
]

results = []
for input_seq, expected_word in test_data:
    predicted_word = predict_next_word(model, tokenizer, input_seq, max_len)
    correct = "Y" if predicted_word.lower() == expected_word.lower() else "N"
    results.append([input_seq, predicted_word, correct])

df = pd.DataFrame(results, columns=["Input Sequence", "Predicted Word", "Correct (Y/N)"])
print(df.to_string(index=False))


           Input Sequence Predicted Word Correct (Y/N)
             To be or not             to             Y
What light through yonder         window             Y
