**Import Libraries**

In [None]:
import tensorflow as tf
import numpy as np
import warnings
import os
import random
random.seed(42)
np.random.seed(42)
os.environ['PYTHONHASHSEED'] = str(42)
tf.random.set_seed(42)
from nlp_model_text_preprocessing import index_the_words, text_to_sequence, pad_sequences, one_hot_encoding, index_the_char, text_to_sequence_char
from english_text_normalization import text_normalization
from deep_learning import nlp_model_word, nlp_model_char, model_compile_word, model_compile_char, model_fit, plot_word_model_change, plot_char_model_change
from model_testing import model_testing_char, model_testing_word
from transformers_models import load_dataset, data_collator, train_arguments, training_, save_model_tokenizer, transformer_testing,transformes_model
tf.keras.utils.set_random_seed(42)
tf.config.experimental.enable_op_determinism()

**Ignore Warnings**

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings(action='ignore')

**Read Data**

In [None]:
Corpus = open("The Alchemist.txt", encoding='utf-8-sig').read().lower().split("\n")
Corpus = [line.strip() for line in Corpus if line.strip()]
Corpus =  list(map(text_normalization, Corpus))
len(Corpus)

In [None]:
with open('Cleaned_Corpus.txt', 'w') as f:
    for line in Corpus:
        f.write(line + '\n') 

**1-Tokens Based On Word**

In [None]:
# Generate Word To Index, Index To Word, And Find Count Of All Words
all_words, words_index, index_to_words = index_the_words(Corpus)
all_words

In [None]:
# Generate Input Sequences And Get The Max Length
input_sequence_ = []
for line in Corpus:
    token = text_to_sequence(words_index, line)
    for i in range(1, len(token)):
        n_grams = token[:i+1]
        input_sequence_.append(n_grams)
        
max_length_word =  max([len(x) for x in input_sequence_])
max_length_word

In [None]:
# Padding The The Input Sequence To Make All Sequence In Same Length
input_sequence_ = pad_sequences(input_sequence=input_sequence_, max_length=max_length_word)
input_sequence_

In [None]:
# Generate Train And Label Data For Model
train, labels = input_sequence_[:,:-1], input_sequence_[:,-1]

In [None]:
# Convert Label Data 
label = one_hot_encoding(labels, all_words)
label

*Deep Learning Models*

In [None]:
# Generate Early Stop Depending On Value Of Loss
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='loss',
    patience=5,
    mode = 'min',
    restore_best_weights=True,
)

In [None]:
# 1-LSTM
lstm_model = tf.keras.layers.LSTM(units=128, return_sequences=False)
LSTM_1 = nlp_model_word(input_dim = all_words, output_dim = 100, input_length = max_length_word, unit = all_words, model = lstm_model)
model_compile_word(model =LSTM_1, optimizer=tf.keras.optimizers.legacy.Adam(), loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
history = model_fit(model=LSTM_1, Data=train, Label=label, epochs=150, early_stop=early_stop, batch_size=32)
plot_word_model_change(history=history)

In [None]:
# 2-Bidirectional LSTM
bidirectional_lstm_model = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units= 128, return_sequences=False))
Bidirectional_LSTM_1 = nlp_model_word(input_dim = all_words, output_dim = 100, input_length = max_length_word, unit = all_words, model = bidirectional_lstm_model)
model_compile_word(model =Bidirectional_LSTM_1, optimizer=tf.keras.optimizers.legacy.Adam(), loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
history = model_fit(model=Bidirectional_LSTM_1, Data=train, Label=label, epochs=150, early_stop=early_stop, batch_size=128)
plot_word_model_change(history=history)

In [None]:
# 3-GRU
gru_model = tf.keras.layers.GRU(units= 128, return_sequences=False)
GRU_1 = nlp_model_word(input_dim = all_words, output_dim = 100, input_length = max_length_word, unit = all_words, model = gru_model)
model_compile_word(model =GRU_1, optimizer=tf.keras.optimizers.legacy.Adam(), loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
history = model_fit(model=GRU_1, Data=train, Label=label, epochs=150, early_stop=early_stop, batch_size=32)
plot_word_model_change(history=history)

*Testing The Model*

In [None]:
# 1-LSTM
model_testing_word(text = 'He decided to wait until the sun had', number_of_words = 15, text_normalization = text_normalization, text_to_sequence = text_to_sequence, words_index = words_index, pad_sequences = pad_sequences, model = LSTM_1, max_length = max_length_word, index_to_words = index_to_words)

In [None]:
# 2-Bidirectional LSTM
model_testing_word(text = 'He decided to wait until the sun had ', number_of_words = 15, text_normalization = text_normalization, text_to_sequence = text_to_sequence, words_index = words_index, pad_sequences = pad_sequences, model = Bidirectional_LSTM_1, max_length = max_length_word, index_to_words = index_to_words)

In [None]:
# 3-GRU
model_testing_word(text = 'He decided to wait until the sun had', number_of_words = 15, text_normalization = text_normalization, text_to_sequence = text_to_sequence, words_index = words_index, pad_sequences = pad_sequences, model = GRU_1, max_length = max_length_word, index_to_words = index_to_words)

*Save The Models*

In [None]:
LSTM_1.save('English_LSTM_1.h5')

In [None]:
Bidirectional_LSTM_1.save('English_Bidirectional_LSTM_1.h5')

In [None]:
GRU_1.save('English_GRU_1.h5')

**2-Transformers**

In [None]:
# Transformers Model Name
model_name_ = 'gpt2'
# Model And Tokenizer For Transformers
tokenizer, model = transformes_model(model_name_)
# Data Used For Train Transformers
train_dataset = load_dataset('Cleaned_Corpus.txt', tokenizer)
# Convert Data Into Batches
collator = data_collator(tokenizer)
# Set Arguments For Train The Transformers
train_args = train_arguments(epochs = 50)
# Train The Transformers
training_(model=model, training_args=train_args, collator = collator, data = train_dataset)
# Save The Model
save_model_tokenizer(model = model, tokenizer = tokenizer)
# Testing The Transformers Model
transformer_testing(input_text='He decided to wait until the sun had', tokenizer=tokenizer, model=model, text_normalization = text_normalization)