In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding, MultiHeadAttention, Dense, Input, GlobalAveragePooling1D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
import pickle
import numpy as np
import os

file = open('Requirements.txt', 'r')

lines = []
for i in file:
    lines.append(i)

data = ""
for i in lines:
    data = ' '.join(lines)

data = data.replace('\n', '').replace('\r', '').replace('\ufeff', '').replace('“', '').replace('”', '')

data = data.split()
data = ' '.join(data)

tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])

pickle.dump(tokenizer, open('token.pkl', 'wb'))

sequence_data = tokenizer.texts_to_sequences([data])[0]

vocab_size = len(tokenizer.word_index) + 1
print(vocab_size)
sequences = []

for i in range(10, len(sequence_data)):
    words = sequence_data[i-10:i+1]
    sequences.append(words)

print("The Length of sequences are: ", len(sequences))
sequences = np.array(sequences)

X = []
y = []

for i in sequences:
    X.append(i[0:10])
    y.append(i[10])

X = np.array(X)
y = np.array(y)
print("Data: ", X[:10])
print("Response: ", y[:10])
y = to_categorical(y, num_classes=vocab_size)

input_layer = Input(shape=(10,))
embedding = Embedding(vocab_size, 128)(input_layer)
transformer_block = MultiHeadAttention(num_heads=8, key_dim=128)(embedding, embedding)
x = GlobalAveragePooling1D()(transformer_block)
x = Dropout(0.1)(x)
x = Dense(1000, activation="relu")(x)
output_layer = Dense(vocab_size, activation="softmax")(x)

model = Model(input_layer, output_layer)

model.summary()

from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint("next_words_transformer.h5", monitor='loss', verbose=1, save_best_only=True)
model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=0.001))
model.fit(X, y, epochs=70, batch_size=64, callbacks=[checkpoint])

from tensorflow.keras.models import load_model
import numpy as np
import pickle

model = load_model('next_words_transformer.h5')
tokenizer = pickle.load(open('token.pkl', 'rb'))

def Predict_Next_Words(model, tokenizer, text):

    sequence = tokenizer.texts_to_sequences([text])
    sequence = np.array(sequence)
    preds = np.argmax(model.predict(sequence))
    predicted_word = ""

    for key, value in tokenizer.word_index.items():
        if value == preds:
            predicted_word = key
            break

    print(predicted_word)
    return predicted_word

while(True):
    text = input("Enter your line: ")

    if text == "0":
        print("Execution completed.....")
        break

    else:
        try:
            text = text.split(" ")
            text = text[-3:]
            print(text)

            Predict_Next_Words(model, tokenizer, text)

        except Exception as e:
            print("Error occurred: ", e)


63
The Length of sequences are:  313
Data:  [[ 1  2  9  7 19 10  1 14  2 11]
 [ 2  9  7 19 10  1 14  2 11  2]
 [ 9  7 19 10  1 14  2 11  2 11]
 [ 7 19 10  1 14  2 11  2 11  4]
 [19 10  1 14  2 11  2 11  4  1]
 [10  1 14  2 11  2 11  4  1  2]
 [ 1 14  2 11  2 11  4  1  2  5]
 [14  2 11  2 11  4  1  2  5  7]
 [ 2 11  2 11  4  1  2  5  7 19]
 [11  2 11  4  1  2  5  7 19 10]]
Response:  [ 2 11  4  1  2  5  7 19 10 14]
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 10)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 10, 128)      8064        ['input_1[0][0]']                
                                                                        

KeyboardInterrupt: Interrupted by user