# Import Libraries

In [1]:
import tensorflow as tf
import numpy as np
from read_data import read_file
from text_normalization import english_text_normalization, arabic_text_normalization
from text_processing import TextProcessing
import random
from tensorflow.keras.layers import LSTM, GRU, Attention, AdditiveAttention, MultiHeadAttention # type: ignore
from encoder_decoder_model import EncoderDecoderWithAttention, EncoderDecoderWithoutAttention
from tensorflow.keras.callbacks import EarlyStopping # type: ignore
from model_check_point import check_point
from data_split import input_output_split, decoder_input_output
import warnings
import os
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Ignore Warnings

In [2]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings(action='ignore')

# Set Early Stop For Encoder-Decoder Model

In [3]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

# Read Data

In [4]:
Corpus = read_file(file_path='Questions Answering Pairs.txt')
len(Corpus)

3653

In [5]:
random.seed(44)
Corpus = random.sample(Corpus, len(Corpus))

# Split The Data Into Encoder Input And Output

In [6]:
input_data, output_data = input_output_split(data=Corpus, text_normalization1=arabic_text_normalization, text_normalization2=english_text_normalization)

# Process The Data 

**Input Data**

In [7]:
text_processing_input = TextProcessing() # Input TextProcessing
all_words_input , words_input = text_processing_input.data_words(filter='"#$&*+/:=@[\\]^_`{|}~', data=input_data) # Set With Unique Words And Find Number Of Unique Words
words_to_index_input = text_processing_input.words_to_index_(words=words_input) # Convert Word To Integer Index
index_to_words_input = text_processing_input.index_to_word_(words=words_input)
input_sequences = text_processing_input.text_to_sequence(word_index=words_to_index_input, data=input_data) # Convert Text Into Squences Of Integer
input_data_max_length = len(max(input_sequences, key = len)) # Find Max Length
pad_input_data = text_processing_input.sequences_padding(input_sequence=input_sequences, max_length=input_data_max_length) # Zero Padding (Add Zeros To End Of Sequence In Input Sequences To Make All Sequences In Same Length)
word_count_input = text_processing_input.word_counts(data=input_data) # Number Of Occurrence Each Word

In [8]:
all_words_input

2464

**Output Data**

In [9]:
text_processing_output = TextProcessing() # Input TextProcessing
all_words_output , words_output = text_processing_output.data_words(filter='"#$&*+/=@[\\]^_`{|}~', data=output_data) # Set With Unique Words And Find Number Of Unique Words
words_to_index_output = text_processing_output.words_to_index_(words=words_output) # Conver Word To Integer Index
index_to_words_output = text_processing_output.index_to_word_(words=words_output)
output_sequences = text_processing_output.text_to_sequence(word_index=words_to_index_output, data=output_data) # Convert Text Into Squences Of Integer
word_count_output = text_processing_output.word_counts(data=output_data) # Number Of Occurrence Each Word

In [10]:
all_words_output

7956

# Generate Decoder Input And Output

In [11]:
decoder_input, decoder_output = decoder_input_output(output_data= output_sequences)

# Find Decoder Max Length

In [12]:
decoder_max_length = len(max(decoder_input, key = len)) # Find Max Length Of Decoder
decoder_max_length

104

# Padding Decoder Data

In [13]:
pad_decoder_input = text_processing_output.sequences_padding(input_sequence=decoder_input, max_length=decoder_max_length)
pad_decoder_output = text_processing_output.sequences_padding(input_sequence=decoder_output, max_length=decoder_max_length)

# Encoder-Decoder Without Attention

**LSTM**

In [14]:
lstm_callback_1 = check_point('./CheckPoint/lstm_1')
model_encoder_lstm_1 = LSTM(units=128, return_sequences=True, return_state=True, seed = 33)
model_decoder_lstm_1 = LSTM(units=128, return_sequences=True, return_state=True, seed = 33)
encoder_decoder_lstm_1 = EncoderDecoderWithoutAttention(input_dim_encoder = all_words_input, input_dim_decoder = all_words_output , output_dim_encoder = 100, output_dim_decoder = 100, input_length_encoder = input_data_max_length , input_length_decoder = decoder_max_length, model_encoder = model_encoder_lstm_1, model_decoder = model_decoder_lstm_1, unit = all_words_output, max_length_input=input_data_max_length, max_length_output=decoder_max_length, callback=lstm_callback_1, early_stop=early_stopping)
encoder_inputs, encoder_states = encoder_decoder_lstm_1.encoder()
decoder_inputs, decoder_outputs = encoder_decoder_lstm_1.decoder(encoder_states=encoder_states)
model_lstm_1 = encoder_decoder_lstm_1.build_model()
encoder_decoder_lstm_1.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_lstm_1, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_lstm_1.model_fit(model = model_lstm_1, epochs = 120, batch_size = 32, encoder_input = pad_input_data, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output)

Epoch 1/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 242ms/step - loss: 7.4488 - sparse_categorical_accuracy: 0.0215
Epoch 2/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 250ms/step - loss: 5.9558 - sparse_categorical_accuracy: 0.0229
Epoch 3/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 257ms/step - loss: 5.6627 - sparse_categorical_accuracy: 0.0358
Epoch 4/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 280ms/step - loss: 5.3063 - sparse_categorical_accuracy: 0.0503
Epoch 5/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 251ms/step - loss: 4.8526 - sparse_categorical_accuracy: 0.0864
Epoch 6/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 263ms/step - loss: 4.4087 - sparse_categorical_accuracy: 0.1380
Epoch 7/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 292ms/step - loss: 4.0478 - sparse_categorical_accuracy: 0.1696

**GRU**

In [16]:
gru_callback_1 = check_point('./CheckPoint/gru_1')
model_encoder_gru_1 = GRU(units=128, return_sequences=True, return_state=True, seed = 33)
model_decoder_gru_1 = GRU(units=128, return_sequences=True, return_state=True, seed = 33)
encoder_decoder_2 = EncoderDecoderWithoutAttention(input_dim_encoder = all_words_input, input_dim_decoder = all_words_output , output_dim_encoder = 100, output_dim_decoder = 100, input_length_encoder = input_data_max_length , input_length_decoder = decoder_max_length, model_encoder = model_encoder_gru_1, model_decoder = model_decoder_gru_1, unit = all_words_output, max_length_input=input_data_max_length, max_length_output=decoder_max_length, callback=gru_callback_1, early_stop=early_stopping)
encoder_inputs, encoder_states = encoder_decoder_2.encoder()
decoder_inputs, decoder_outputs = encoder_decoder_2.decoder(encoder_states=encoder_states)
model_gru_1 = encoder_decoder_2.build_model()
encoder_decoder_2.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_gru_1, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_2.model_fit(model = model_gru_1, epochs = 120, batch_size = 32, encoder_input = pad_input_data, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output)

Epoch 1/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 214ms/step - loss: 7.5085 - sparse_categorical_accuracy: 0.0298
Epoch 2/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 231ms/step - loss: 5.9697 - sparse_categorical_accuracy: 0.0245
Epoch 3/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 232ms/step - loss: 5.8769 - sparse_categorical_accuracy: 0.0252
Epoch 4/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 225ms/step - loss: 5.6820 - sparse_categorical_accuracy: 0.0434
Epoch 5/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 220ms/step - loss: 4.9830 - sparse_categorical_accuracy: 0.0984
Epoch 6/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 231ms/step - loss: 4.2086 - sparse_categorical_accuracy: 0.1628
Epoch 7/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 232ms/step - loss: 3.7438 - sparse_categorical_accuracy: 0.1960

# Encoder-Decoder With Attention

**1-Luong Attension**

*LSTM*

In [18]:
lstm_callback_2 = check_point('./CheckPoint/lstm_2')
model_encoder_lstm_2 = LSTM(units=128, return_sequences=True, return_state=True, seed = 33)
model_decoder_lstm_2 = LSTM(units=128, return_sequences=True, return_state=True, seed = 33)
attention_layer = Attention(use_scale=True)
encoder_decoder_3 = EncoderDecoderWithAttention(input_dim_encoder = all_words_input, input_dim_decoder = all_words_output , output_dim_encoder = 100, output_dim_decoder = 100, input_length_encoder = input_data_max_length , input_length_decoder = decoder_max_length, model_encoder = model_encoder_lstm_2, model_decoder = model_decoder_lstm_2, unit = all_words_output, attention_layer = attention_layer, max_length_input=input_data_max_length, max_length_output=decoder_max_length, callback=lstm_callback_2, early_stop=early_stopping)
output_data, encoder_inputs, encoder_states = encoder_decoder_3.encoder_()
decoder_inputs, decoder_outputs = encoder_decoder_3.decoder_(encoder_states=encoder_states, encoder_outputs=output_data)
model_lstm_2 = encoder_decoder_3.build_model()
encoder_decoder_3.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_lstm_2, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_3.model_fit(model = model_lstm_2, epochs = 120, batch_size = 32, encoder_input = pad_input_data, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output)

Epoch 1/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 333ms/step - loss: 5.2099 - sparse_categorical_accuracy: 0.5122
Epoch 2/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 332ms/step - loss: 2.7688 - sparse_categorical_accuracy: 0.5582
Epoch 3/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 334ms/step - loss: 2.5472 - sparse_categorical_accuracy: 0.5795
Epoch 4/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 333ms/step - loss: 2.2588 - sparse_categorical_accuracy: 0.6393
Epoch 5/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 333ms/step - loss: 2.0000 - sparse_categorical_accuracy: 0.6917
Epoch 6/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 333ms/step - loss: 1.7991 - sparse_categorical_accuracy: 0.7222
Epoch 7/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 337ms/step - loss: 1.6494 - sparse_categorical_accuracy: 0.7412

*GRU*

In [19]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [20]:
gru_callback_2 = check_point('./CheckPoint/gru_2')
model_encoder_gru_2 = GRU(units=128, return_sequences=True, return_state=True, seed = 33)
model_decoder_gru_2 = GRU(units=128, return_sequences=True, return_state=True, seed = 33)
attention_layer = Attention(use_scale=True)
encoder_decoder_4 = EncoderDecoderWithAttention(input_dim_encoder = all_words_input, input_dim_decoder = all_words_output , output_dim_encoder = 100, output_dim_decoder = 100, input_length_encoder = input_data_max_length , input_length_decoder = decoder_max_length, model_encoder = model_encoder_gru_2, model_decoder = model_decoder_gru_2, unit = all_words_output, attention_layer = attention_layer, max_length_input=input_data_max_length, max_length_output=decoder_max_length, callback=gru_callback_2, early_stop=early_stopping)
output_data, encoder_inputs, encoder_states = encoder_decoder_4.encoder_()
decoder_inputs, decoder_outputs = encoder_decoder_4.decoder_(encoder_states=encoder_states, encoder_outputs=output_data)
model_gru_2 = encoder_decoder_4.build_model()
encoder_decoder_4.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_gru_2, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_4.model_fit(model = model_gru_2, epochs = 120, batch_size = 32, encoder_input = pad_input_data, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output)

Epoch 1/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 345ms/step - loss: 5.0853 - sparse_categorical_accuracy: 0.5139
Epoch 2/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 347ms/step - loss: 2.6471 - sparse_categorical_accuracy: 0.5679
Epoch 3/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 341ms/step - loss: 2.2617 - sparse_categorical_accuracy: 0.6421
Epoch 4/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 349ms/step - loss: 1.8848 - sparse_categorical_accuracy: 0.7098
Epoch 5/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 347ms/step - loss: 1.6739 - sparse_categorical_accuracy: 0.7355
Epoch 6/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 345ms/step - loss: 1.5364 - sparse_categorical_accuracy: 0.7495
Epoch 7/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 350ms/step - loss: 1.4319 - sparse_categorical_accuracy: 0.7605

**2-Bahdanau Attension**

*LSTM*

In [24]:
lstm_callback_3 = check_point('./CheckPoint/lstm_3')
model_encoder_lstm_3 = LSTM(units=128, return_sequences=True, return_state=True, seed = 33)
model_decoder_lstm_ = LSTM(units=128, return_sequences=True, return_state=True, seed = 33)
attention_layer = AdditiveAttention(use_scale=True)
encoder_decoder_5 = EncoderDecoderWithAttention(input_dim_encoder = all_words_input, input_dim_decoder = all_words_output , output_dim_encoder = 100, output_dim_decoder = 100, input_length_encoder = input_data_max_length , input_length_decoder = decoder_max_length, model_encoder = model_encoder_lstm_3, model_decoder = model_encoder_lstm_3, unit = all_words_output, attention_layer = attention_layer, max_length_input=input_data_max_length, max_length_output=decoder_max_length, callback=lstm_callback_3, early_stop=early_stopping)
output_data, encoder_inputs, encoder_states = encoder_decoder_5.encoder_()
decoder_inputs, decoder_outputs = encoder_decoder_5.decoder_(encoder_states=encoder_states, encoder_outputs=output_data)
model_lstm_3 = encoder_decoder_5.build_model()
encoder_decoder_5.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_lstm_3, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_5.model_fit(model = model_lstm_3, epochs = 120, batch_size = 32, encoder_input = pad_input_data, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output)

Epoch 1/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 339ms/step - loss: 5.3100 - sparse_categorical_accuracy: 0.5122
Epoch 2/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 340ms/step - loss: 2.7913 - sparse_categorical_accuracy: 0.5564
Epoch 3/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 342ms/step - loss: 2.5082 - sparse_categorical_accuracy: 0.5835
Epoch 4/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 341ms/step - loss: 2.2462 - sparse_categorical_accuracy: 0.6416
Epoch 5/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 340ms/step - loss: 2.0277 - sparse_categorical_accuracy: 0.6834
Epoch 6/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 338ms/step - loss: 1.8580 - sparse_categorical_accuracy: 0.7111
Epoch 7/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 341ms/step - loss: 1.7262 - sparse_categorical_accuracy: 0.7280

*GRU*

In [25]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [26]:
gru_callback_3 = check_point('./CheckPoint/gru_3')
model_encoder_gru_3 = GRU(units=128, return_sequences=True, return_state=True, seed = 33)
model_decoder_gru_3 = GRU(units=128, return_sequences=True, return_state=True, seed = 33)
attention_layer = Attention(use_scale=True)
encoder_decoder_6 = EncoderDecoderWithAttention(input_dim_encoder = all_words_input, input_dim_decoder = all_words_output , output_dim_encoder = 100, output_dim_decoder = 100, input_length_encoder = input_data_max_length , input_length_decoder = decoder_max_length, model_encoder = model_encoder_gru_3, model_decoder = model_decoder_gru_3, unit = all_words_output, attention_layer = attention_layer, max_length_input=input_data_max_length, max_length_output=decoder_max_length, callback=gru_callback_3, early_stop=early_stopping)
output_data, encoder_inputs, encoder_states = encoder_decoder_6.encoder_()
decoder_inputs, decoder_outputs = encoder_decoder_6.decoder_(encoder_states=encoder_states, encoder_outputs=output_data)
model_gru_3 = encoder_decoder_6.build_model()
encoder_decoder_6.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_gru_3, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_6.model_fit(model = model_gru_3, epochs = 120, batch_size = 32, encoder_input = pad_input_data, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output)

Epoch 1/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 340ms/step - loss: 5.0462 - sparse_categorical_accuracy: 0.5139
Epoch 2/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 332ms/step - loss: 2.6584 - sparse_categorical_accuracy: 0.5684
Epoch 3/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 326ms/step - loss: 2.2655 - sparse_categorical_accuracy: 0.6414
Epoch 4/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 356ms/step - loss: 1.8807 - sparse_categorical_accuracy: 0.7099
Epoch 5/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 327ms/step - loss: 1.6660 - sparse_categorical_accuracy: 0.7357
Epoch 6/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 320ms/step - loss: 1.5249 - sparse_categorical_accuracy: 0.7517
Epoch 7/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 317ms/step - loss: 1.4184 - sparse_categorical_accuracy: 0.7629

**3-MultiHead Attesnion**

*LSTM*

In [28]:
lstm_callback_4 = check_point('./CheckPoint/lstm_4')
model_encoder_lstm_4 = LSTM(units=128, return_sequences=True, return_state=True, seed = 33)
model_decoder_lstm_4 = LSTM(units=128, return_sequences=True, return_state=True, seed = 33)
attention_layer = MultiHeadAttention(num_heads=4, key_dim=32)
encoder_decoder_7 = EncoderDecoderWithAttention(input_dim_encoder = all_words_input, input_dim_decoder = all_words_output , output_dim_encoder = 100, output_dim_decoder = 100, input_length_encoder = input_data_max_length , input_length_decoder = decoder_max_length, model_encoder = model_encoder_lstm_4, model_decoder = model_decoder_lstm_4, unit = all_words_output, attention_layer = attention_layer, max_length_input=input_data_max_length, max_length_output=decoder_max_length, callback=lstm_callback_4, early_stop=early_stopping)
output_data, encoder_inputs, encoder_states = encoder_decoder_7.encoder_()
decoder_inputs, decoder_outputs = encoder_decoder_7.decoder_(encoder_states=encoder_states, encoder_outputs=output_data)
model_lstm_4 = encoder_decoder_7.build_model()
encoder_decoder_7.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_lstm_4, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_7.model_fit(model = model_lstm_4, epochs = 120, batch_size = 32, encoder_input = pad_input_data, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output)

Epoch 1/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 340ms/step - loss: 4.9730 - sparse_categorical_accuracy: 0.5152
Epoch 2/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 335ms/step - loss: 2.7621 - sparse_categorical_accuracy: 0.5588
Epoch 3/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 349ms/step - loss: 2.5909 - sparse_categorical_accuracy: 0.5710
Epoch 4/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 334ms/step - loss: 2.3461 - sparse_categorical_accuracy: 0.6092
Epoch 5/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 349ms/step - loss: 2.0673 - sparse_categorical_accuracy: 0.6752
Epoch 6/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 332ms/step - loss: 1.8852 - sparse_categorical_accuracy: 0.6992
Epoch 7/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 351ms/step - loss: 1.7638 - sparse_categorical_accuracy: 0.7168

*GRU*

In [30]:
gru_callback_4 = check_point('./CheckPoint/gru_4')
model_encoder_gru_4 = GRU(units=128, return_sequences=True, return_state=True, seed = 33)
model_decoder_gru_4 = GRU(units=128, return_sequences=True, return_state=True, seed = 33)
attention_layer = MultiHeadAttention(num_heads=4, key_dim=32)
encoder_decoder_8 = EncoderDecoderWithAttention(input_dim_encoder = all_words_input, input_dim_decoder = all_words_output , output_dim_encoder = 100, output_dim_decoder = 100, input_length_encoder = input_data_max_length , input_length_decoder = decoder_max_length, model_encoder = model_encoder_gru_4, model_decoder = model_decoder_gru_4, unit = all_words_output, attention_layer = attention_layer, max_length_input=input_data_max_length, max_length_output=decoder_max_length, callback=gru_callback_4, early_stop=early_stopping)
output_data, encoder_inputs, encoder_states = encoder_decoder_8.encoder_()
decoder_inputs, decoder_outputs = encoder_decoder_8.decoder_(encoder_states=encoder_states, encoder_outputs=output_data)
model_gru_4 = encoder_decoder_8.build_model()
encoder_decoder_8.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_gru_4, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_8.model_fit(model = model_gru_4, epochs = 120, batch_size = 32, encoder_input = pad_input_data, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output)

Epoch 1/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 329ms/step - loss: 4.8795 - sparse_categorical_accuracy: 0.5163
Epoch 2/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 319ms/step - loss: 2.6472 - sparse_categorical_accuracy: 0.5691
Epoch 3/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 319ms/step - loss: 2.1970 - sparse_categorical_accuracy: 0.6452
Epoch 4/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 320ms/step - loss: 1.8435 - sparse_categorical_accuracy: 0.7079
Epoch 5/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 319ms/step - loss: 1.6512 - sparse_categorical_accuracy: 0.7322
Epoch 6/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 338ms/step - loss: 1.5241 - sparse_categorical_accuracy: 0.7472
Epoch 7/120
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 320ms/step - loss: 1.4297 - sparse_categorical_accuracy: 0.7577