# Import Libraries

In [1]:
import tensorflow as tf
import numpy as np
from src.read_data import read_file
from src.text_normalization import arabic_text_normalization
from src.text_processing import data_words, words_to_index_, index_to_word_, word_counts, text_to_sequence, sequences_padding, word_sequence_to_text
import random
from tensorflow.keras.layers import Attention, AdditiveAttention, MultiHeadAttention # type: ignore
from tensorflow.keras.layers import LSTM, GRU, Bidirectional# type: ignore
from tensorflow.keras.callbacks import EarlyStopping # type: ignore
from src.data_split import input_output_split, decoder_input_output
from src.encoder import encoder_without_attention, encoder_with_attention
from src.decoder import decoder_without_attention, decoder_with_attention
from src.encoder_decoder_model import build_model, model_compile, model_fit, save_model, summary
from src.transformers import transformers
import warnings
import os
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Ignore Warnings

In [2]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings(action='ignore')

# Set Early Stop For Encoder-Decoder Model

In [3]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

# Read Data

In [6]:
Corpus = read_file(file_path='Question Answer Data.txt')
Corpus = [sentences for sentences in Corpus if len(sentences) < 500]
len(Corpus)

2221

In [7]:
random.seed(44)
Corpus = random.sample(Corpus, len(Corpus))

# Split The Data Into Encoder Input And Output

In [8]:
context, question, answer = input_output_split(data=Corpus, text_normalization1=arabic_text_normalization, text_normalization2=arabic_text_normalization, text_normalization3 =arabic_text_normalization)

# Text Processing

**1-Word To Integer (Tokenizer)**

In [9]:
all_words , words = data_words(filter='"#$&*+/:=@[\\]^_{|}~', data=(context + question + answer)) # Set With Unique Words And Find Number Of Unique Words
words_to_index = words_to_index_(words=words) # Convert Word To Integer Index
index_to_words = index_to_word_(words=words)

In [10]:
all_words

19473

**2-Data To Sequence**

In [11]:
context_sequences = text_to_sequence(word_index=words_to_index, data=context) # Convert Text Into Squences Of Integer
question_sequences = text_to_sequence(word_index=words_to_index, data=question) # Convert Text Into Squences Of Integer
answer_sequences = text_to_sequence(word_index=words_to_index, data=answer) # Convert Text Into Squences Of Integer

**3-Max Length**

In [12]:
context_max_length = len(max(context_sequences, key = len)) # Find Max Length
question_max_length = len(max(question_sequences, key = len))
max_length = max(context_max_length, question_max_length)
answer_max_length = len(max(answer_sequences, key=len))# Find Max Length

In [13]:
max_length

212

**4-Encoder Zero Padding**

In [14]:
pad_context = sequences_padding(input_sequence=context_sequences, max_length=max_length) # Zero Padding (Add Zeros To End Of Sequence In Input Sequences To Make All Sequences In Same Length)
pad_question = sequences_padding(input_sequence=question_sequences, max_length=max_length) # Zero Padding (Add Zeros To End Of Sequence In Input Sequences To Make All Sequences In Same Length)

**5-Generate Decoder Input And Output**

In [15]:
decoder_input, decoder_output = decoder_input_output(output_data= answer_sequences)

**6-Decoder Zero Padding**

In [16]:
pad_decoder_input = sequences_padding(input_sequence=decoder_input, max_length=answer_max_length) # Zero Padding (Add Zeros To End Of Sequence In Input Sequences To Make All Sequences In Same Length)
pad_decoder_output = sequences_padding(input_sequence=decoder_output, max_length=answer_max_length) # Zero Padding (Add Zeros To End Of Sequence In Input Sequences To Make All Sequences In Same Length)

**7-Word Occurrence**

In [17]:
word_count = word_counts(data=(context + question + answer)) # Number Of Occurrence Each Word

# Encoder-Decoder Without Attention

In [18]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

**1-LSTM**

In [19]:
encoder_model_1 = LSTM(units=128, return_sequences=True, return_state=True, seed=33)
context_inputs, question_inputs, encoder_states = encoder_without_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_1)

decoder_model_1 = LSTM(units=128, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_without_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_1)

model_1 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_1)

In [20]:
model_compile(model = model_1, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_1, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_1,name="LSTM_1")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 339ms/step - loss: 8.4459 - sparse_categorical_accuracy: 0.0251
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 351ms/step - loss: 5.3937 - sparse_categorical_accuracy: 0.0270
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 422ms/step - loss: 5.1739 - sparse_categorical_accuracy: 0.0270
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 365ms/step - loss: 5.0157 - sparse_categorical_accuracy: 0.0285
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 371ms/step - loss: 4.8596 - sparse_categorical_accuracy: 0.0303
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 366ms/step - loss: 4.7291 - sparse_categorical_accuracy: 0.0303
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 363ms/step - loss: 4.6215 - sparse_categorical_accuracy: 0.0304
Epoch 8/120


In [21]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

**2-GRU**

In [22]:
encoder_model_2 = GRU(units=128, return_sequences=True, return_state=True, seed=33)
context_inputs, question_inputs, encoder_states = encoder_without_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_2)

decoder_model_2 = GRU(units=128, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_without_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_2)

model_2 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_2)

In [23]:
model_compile(model = model_2, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_2, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_2, name="GRU_1")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 315ms/step - loss: 8.4798 - sparse_categorical_accuracy: 0.0245
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 316ms/step - loss: 5.3810 - sparse_categorical_accuracy: 0.0270
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 303ms/step - loss: 5.1610 - sparse_categorical_accuracy: 0.0303
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 315ms/step - loss: 5.0516 - sparse_categorical_accuracy: 0.0307
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 314ms/step - loss: 4.9361 - sparse_categorical_accuracy: 0.0308
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 313ms/step - loss: 4.8199 - sparse_categorical_accuracy: 0.0309
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 326ms/step - loss: 4.6860 - sparse_categorical_accuracy: 0.0309
Epoch 8/120


In [24]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

**3-Bidirectional**

In [25]:
encoder_model_3 = Bidirectional(LSTM(units=128, return_sequences=True, return_state=True, seed=33))
context_inputs, question_inputs, encoder_states = encoder_without_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_3)

decoder_model_3 = LSTM(units=256, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_without_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_3)

model_3 =  build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_3)

In [26]:
model_compile(model = model_3, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_3, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_3,name="Bidirectional_1")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 470ms/step - loss: 8.0186 - sparse_categorical_accuracy: 0.0252
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 600ms/step - loss: 5.3054 - sparse_categorical_accuracy: 0.0271
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 615ms/step - loss: 5.0244 - sparse_categorical_accuracy: 0.0291
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 549ms/step - loss: 4.6653 - sparse_categorical_accuracy: 0.0308
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 608ms/step - loss: 4.4053 - sparse_categorical_accuracy: 0.0329
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 539ms/step - loss: 4.2199 - sparse_categorical_accuracy: 0.0342
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 605ms/step - loss: 4.0874 - sparse_categorical_accuracy: 0.0349
Epoch 8/120


# Encoder-Decoder With Attention

**1-Luong Attension**

In [27]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

*A-LSTM*

In [28]:
encoder_model_4 = LSTM(units=128, return_sequences=True, return_state=True, seed=33)
context_inputs, question_inputs, encoder_states, encoder_outputs = encoder_with_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_4)

attention_layer = Attention(use_scale=True)
decoder_model_4 = LSTM(units=128, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_with_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_4, encoder_outputs=encoder_outputs, attention_layer=attention_layer)

model_4 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_4)

In [29]:
model_compile(model = model_4, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_4, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_4, name="LSTM_2")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 453ms/step - loss: 5.0237 - sparse_categorical_accuracy: 0.8348
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 490ms/step - loss: 0.6050 - sparse_categorical_accuracy: 0.9210
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 456ms/step - loss: 0.5702 - sparse_categorical_accuracy: 0.9226
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 501ms/step - loss: 0.5549 - sparse_categorical_accuracy: 0.9260
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 450ms/step - loss: 0.5389 - sparse_categorical_accuracy: 0.9260
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 438ms/step - loss: 0.5159 - sparse_categorical_accuracy: 0.9260
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 505ms/step - loss: 0.4956 - sparse_categorical_accuracy: 0.9269
Epoch 8/120


*B-GRU*

In [30]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [31]:
encoder_model_5 = GRU(units=128, return_sequences=True, return_state=True, seed=33)
context_inputs, question_inputs, encoder_states, encoder_outputs = encoder_with_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_5)

attention_layer = Attention(use_scale=True)
decoder_model_5 = GRU(units=128, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_with_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_5, attention_layer=attention_layer, encoder_outputs=encoder_outputs)

model_5 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_5)

In [32]:
model_compile(model = model_5, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_5, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_5, name="GRU_2")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 414ms/step - loss: 4.8205 - sparse_categorical_accuracy: 0.8364
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 415ms/step - loss: 0.5891 - sparse_categorical_accuracy: 0.9218
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 417ms/step - loss: 0.5524 - sparse_categorical_accuracy: 0.9260
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 411ms/step - loss: 0.5196 - sparse_categorical_accuracy: 0.9262
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 414ms/step - loss: 0.4935 - sparse_categorical_accuracy: 0.9270
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 413ms/step - loss: 0.4715 - sparse_categorical_accuracy: 0.9286
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 412ms/step - loss: 0.4582 - sparse_categorical_accuracy: 0.9292
Epoch 8/120


*C-Bidirectional*

In [33]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [34]:
encoder_model_6 = Bidirectional(LSTM(units=128, return_sequences=True, return_state=True, seed=33))
context_inputs, question_inputs, encoder_states, encoder_outputs = encoder_with_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_6)

attention_layer = Attention(use_scale=True)
decoder_model_6 = LSTM(units=256, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_with_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_6, attention_layer=attention_layer, encoder_outputs=encoder_outputs)

model_6 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_6)

In [35]:
model_compile(model = model_6, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_6, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_6, name="Bidirectional_2")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 798ms/step - loss: 3.8940 - sparse_categorical_accuracy: 0.8355
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 963ms/step - loss: 0.5950 - sparse_categorical_accuracy: 0.9222
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 957ms/step - loss: 0.5712 - sparse_categorical_accuracy: 0.9260
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 896ms/step - loss: 0.5577 - sparse_categorical_accuracy: 0.9260
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 918ms/step - loss: 0.5447 - sparse_categorical_accuracy: 0.9260
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 944ms/step - loss: 0.5261 - sparse_categorical_accuracy: 0.9262
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 920ms/step - loss: 0.5029 - sparse_categorical_accuracy: 0.9265
Epoch 8/120


**2-Bahdanau Attension**

*A-LSTM*

In [36]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [37]:
encoder_model_7 = LSTM(units=128, return_sequences=True, return_state=True, seed=33)
context_inputs, question_inputs, encoder_states, encoder_outputs = encoder_with_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_7)

attention_layer = AdditiveAttention(use_scale=True)
decoder_model_7 = LSTM(units=128, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_with_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_7, attention_layer=attention_layer, encoder_outputs=encoder_outputs)

model_7 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_7)

In [38]:
model_compile(model = model_7, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_7, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_7, name="LSTM_3")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 552ms/step - loss: 5.0265 - sparse_categorical_accuracy: 0.8353
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 539ms/step - loss: 0.6036 - sparse_categorical_accuracy: 0.9212
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 528ms/step - loss: 0.5715 - sparse_categorical_accuracy: 0.9227
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 566ms/step - loss: 0.5559 - sparse_categorical_accuracy: 0.9248
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 574ms/step - loss: 0.5419 - sparse_categorical_accuracy: 0.9260
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 491ms/step - loss: 0.5254 - sparse_categorical_accuracy: 0.9260
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 505ms/step - loss: 0.5094 - sparse_categorical_accuracy: 0.9268
Epoch 8/120


*B-GRU*

In [39]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [40]:
encoder_model_8 = GRU(units=128, return_sequences=True, return_state=True, seed=33)
context_inputs, question_inputs, encoder_states, encoder_outputs = encoder_with_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_8)

attention_layer = Attention(use_scale=True)
decoder_model_8 = GRU(units=128, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_with_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_8, attention_layer=attention_layer, encoder_outputs=encoder_outputs)

model_8 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_8)

In [41]:
model_compile(model = model_8, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_8, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_8, name="GRU_3")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 406ms/step - loss: 4.8317 - sparse_categorical_accuracy: 0.8363
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 458ms/step - loss: 0.5888 - sparse_categorical_accuracy: 0.9218
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 463ms/step - loss: 0.5552 - sparse_categorical_accuracy: 0.9261
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 486ms/step - loss: 0.5206 - sparse_categorical_accuracy: 0.9262
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 445ms/step - loss: 0.4914 - sparse_categorical_accuracy: 0.9266
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 425ms/step - loss: 0.4708 - sparse_categorical_accuracy: 0.9282
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 488ms/step - loss: 0.4578 - sparse_categorical_accuracy: 0.9287
Epoch 8/120


*C-Bidirectional*

In [42]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [43]:
encoder_model_9 = Bidirectional(LSTM(units=128, return_sequences=True, return_state=True, seed=33))
context_inputs, question_inputs, encoder_states, encoder_outputs = encoder_with_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_9)

attention_layer = AdditiveAttention(use_scale=True)
decoder_model_9 = LSTM(units=256, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_with_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_9, attention_layer=attention_layer, encoder_outputs=encoder_outputs)

model_9 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_9)

In [44]:
model_compile(model = model_9, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_9, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_9, name="Bidirectional_3")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 1s/step - loss: 3.9750 - sparse_categorical_accuracy: 0.8367
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 1s/step - loss: 0.5924 - sparse_categorical_accuracy: 0.9225
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 1s/step - loss: 0.5670 - sparse_categorical_accuracy: 0.9260
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 1s/step - loss: 0.5482 - sparse_categorical_accuracy: 0.9260
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 1s/step - loss: 0.5155 - sparse_categorical_accuracy: 0.9261
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 1s/step - loss: 0.4897 - sparse_categorical_accuracy: 0.9272
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 1s/step - loss: 0.4688 - sparse_categorical_accuracy: 0.9279
Epoch 8/120
[1m70/70[0m [32m━━━

**3-MultiHead Attesnion**

*A-LSTM*

In [45]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [46]:
encoder_model_10 = LSTM(units=128, return_sequences=True, return_state=True, seed=33)
context_inputs, question_inputs, encoder_states, encoder_outputs = encoder_with_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_10)

attention_layer = MultiHeadAttention(num_heads=4, key_dim=32)
decoder_model_10 = LSTM(units=128, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_with_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model= decoder_model_10, attention_layer=attention_layer, encoder_outputs=encoder_outputs)

model_10 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_10)

In [47]:
model_compile(model = model_10, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_10, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_10, name="LSTM_4")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 449ms/step - loss: 4.6912 - sparse_categorical_accuracy: 0.8363
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 439ms/step - loss: 0.5764 - sparse_categorical_accuracy: 0.9225
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 440ms/step - loss: 0.5464 - sparse_categorical_accuracy: 0.9227
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 437ms/step - loss: 0.5228 - sparse_categorical_accuracy: 0.9227
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 445ms/step - loss: 0.5013 - sparse_categorical_accuracy: 0.9253
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 441ms/step - loss: 0.4791 - sparse_categorical_accuracy: 0.9274
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 516ms/step - loss: 0.4653 - sparse_categorical_accuracy: 0.9278
Epoch 8/120


*B-GRU*

In [48]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [49]:
encoder_model_11= GRU(units=128, return_sequences=True, return_state=True, seed=33)
context_inputs, question_inputs, encoder_states, encoder_outputs = encoder_with_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_11)

attention_layer = MultiHeadAttention(num_heads=4, key_dim=32)
decoder_model_11 = GRU(units=128, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_with_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_11, attention_layer=attention_layer, encoder_outputs=encoder_outputs)

model_11 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_11)

In [50]:
model_compile(model = model_11, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_11, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_11, name="GRU_4")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 454ms/step - loss: 4.5512 - sparse_categorical_accuracy: 0.8416
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 424ms/step - loss: 0.5685 - sparse_categorical_accuracy: 0.9230
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 421ms/step - loss: 0.5357 - sparse_categorical_accuracy: 0.9259
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 428ms/step - loss: 0.5130 - sparse_categorical_accuracy: 0.9256
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 417ms/step - loss: 0.4875 - sparse_categorical_accuracy: 0.9269
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 410ms/step - loss: 0.4678 - sparse_categorical_accuracy: 0.9273
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 411ms/step - loss: 0.4557 - sparse_categorical_accuracy: 0.9279
Epoch 8/120


*C-Bidirectional*

In [51]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [52]:
encoder_model_12 = Bidirectional(LSTM(units=128, return_sequences=True, return_state=True, seed=33))
context_inputs, question_inputs, encoder_states, encoder_outputs = encoder_with_attention(input_dim_encoder = all_words, output_dim_encoder = 100, encoder_model= encoder_model_12)

attention_layer = MultiHeadAttention(num_heads=4, key_dim=32)
decoder_model_12 = LSTM(units=256, return_sequences=True, return_state=True, seed=33)
decoder_inputs, decoder_outputs  = decoder_with_attention(encoder_states = encoder_states, input_dim_decoder = all_words, output_dim_decoder = 128, unit = all_words,  decoder_model=decoder_model_12, attention_layer=attention_layer, encoder_outputs=encoder_outputs)

model_12 = build_model(context_input= context_inputs, question_input= question_inputs, decoder_inputs = decoder_inputs, decoder_output= decoder_outputs)
summary(model=model_12)

In [53]:
model_compile(model = model_12, optimizer = tf.keras.optimizers.Adam(), loss = 'sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
history = model_fit(model = model_12, epochs = 120, batch_size = 32, context_data =  pad_context, question_data=pad_question, decoder_input = pad_decoder_input, decoder_output = pad_decoder_output, early_stop = early_stopping)
save_model(model=model_12, name="Bidirectional_4")

Epoch 1/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 840ms/step - loss: 3.8850 - sparse_categorical_accuracy: 0.8341
Epoch 2/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 954ms/step - loss: 0.5777 - sparse_categorical_accuracy: 0.9228
Epoch 3/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 980ms/step - loss: 0.5414 - sparse_categorical_accuracy: 0.9260
Epoch 4/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 949ms/step - loss: 0.5098 - sparse_categorical_accuracy: 0.9260
Epoch 5/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 990ms/step - loss: 0.4861 - sparse_categorical_accuracy: 0.9272
Epoch 6/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 974ms/step - loss: 0.4678 - sparse_categorical_accuracy: 0.9283
Epoch 7/120
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 891ms/step - loss: 0.4501 - sparse_categorical_accuracy: 0.9289
Epoch 8/120


# Transformers

In [54]:
context = 'نظرا لأنه تم تعيينها كعاصمة وطنية تم بناء العديد من الهياكل في ذلك الوقت حتى اليوم لا يزال بعضها مفتوحا للسياح'
question = 'ماذا حدث في الوقت الذي تم فيه تعيين نانجينغ كعاصمة ؟ '
model_name = "wonfs/arabert-v2-qa"

In [55]:
transformers(model_name=model_name, context=context, question=question)

All model checkpoint layers were used when initializing TFBertForQuestionAnswering.

All the layers of TFBertForQuestionAnswering were initialized from the model checkpoint at wonfs/arabert-v2-qa.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForQuestionAnswering for predictions without further training.



Context: نظرا لأنه تم تعيينها كعاصمة وطنية تم بناء العديد من الهياكل في ذلك الوقت حتى اليوم لا يزال بعضها مفتوحا للسياح
Question: ماذا حدث في الوقت الذي تم فيه تعيين نانجينغ كعاصمة ؟ 
Answer: تم بناء العديد من الهياكل في ذلك الوقت
