# Import Libraries

In [1]:
import tensorflow as tf
import numpy as np
from src.read_data import read_file
from src.text_normalization import english_text_normalization
from src.text_processing import TextProcessing
import random
from tensorflow.keras.layers import Attention, AdditiveAttention, MultiHeadAttention # type: ignore
from src.encoder_decoder_model import EncoderDecoderWithAttention, EncoderDecoderWithoutAttention
from tensorflow.keras.callbacks import EarlyStopping # type: ignore
from src.data_split import input_output_split, decoder_input_output
import warnings
import os
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Ignore Warnings

In [2]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings(action='ignore')

# Set Early Stop For Encoder-Decoder Model

In [3]:
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

# Read Data

In [4]:
Corpus = read_file(file_path='Medical Question Answering.txt')
Corpus = [sentence for sentence in Corpus if len(sentence) < 600]
len(Corpus)

5318

In [5]:
random.seed(44)
Corpus = random.sample(Corpus, len(Corpus))

# Split The Data Into Encoder Input And Output

In [6]:
input_data, output_data = input_output_split(data=Corpus, text_normalization1=english_text_normalization, text_normalization2=english_text_normalization)

# Text Processing

**1-Word To Integer (Tokenizer)**

In [7]:
text_processing = TextProcessing() # Input TextProcessing
all_words , words = text_processing.data_words(filter='"#$&*+/:=@[\\]^_{|}~', data=(input_data + output_data)) # Set With Unique Words And Find Number Of Unique Words
words_to_index = text_processing.words_to_index_(words=words) # Convert Word To Integer Index
index_to_words = text_processing.index_to_word_(words=words)

**2-Data To Sequence**

In [8]:
input_sequences = text_processing.text_to_sequence(word_index=words_to_index, data=input_data) # Convert Text Into Squences Of Integer
output_sequences = text_processing.text_to_sequence(word_index=words_to_index, data=output_data) # Convert Text Into Squences Of Integer

**3-Max Length**

In [9]:
input_max_length = len(max(input_sequences, key = len)) # Find Max Length
output_max_length = len(max(output_sequences, key=len))

**4-Zero Padding**

In [10]:
pad_input = text_processing.sequences_padding(input_sequence=input_sequences, max_length=input_max_length) # Zero Padding (Add Zeros To End Of Sequence In Input Sequences To Make All Sequences In Same Length)
pad_output = text_processing.sequences_padding(input_sequence=output_sequences, max_length=output_max_length) # Zero Padding (Add Zeros To End Of Sequence In Input Sequences To Make All Sequences In Same Length)

**5-Word Occurrence**

In [11]:
word_count = text_processing.word_counts(data=(input_data + output_data)) # Number Of Occurrence Each Word

# Generate Decoder Input And Output

In [12]:
decoder_input, decoder_output = decoder_input_output(output_data= output_sequences)

# Encoder-Decoder Without Attention

**LSTM**

In [13]:
encoder_decoder_1 = EncoderDecoderWithoutAttention(input_dim_encoder = all_words, input_dim_decoder = all_words , output_dim_encoder = 100, output_dim_decoder = 100,  unit1 = 128, unit2=all_words, early_stop=early_stopping)
encoder_inputs, encoder_states = encoder_decoder_1.encoder()
decoder_inputs, decoder_outputs = encoder_decoder_1.decoder(encoder_states=encoder_states)
model_1 = encoder_decoder_1.build_model()
encoder_decoder_1.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_1, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_1.model_fit(model = model_1, epochs = 120, batch_size = 32, encoder_input = pad_input, decoder_input = decoder_input, decoder_output = decoder_output)
encoder_decoder_1.save_model(model=model_1, name='lstm_1')

Epoch 1/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 475ms/step - loss: 7.3224 - sparse_categorical_accuracy: 0.0223
Epoch 2/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 416ms/step - loss: 5.9992 - sparse_categorical_accuracy: 0.0317
Epoch 3/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 449ms/step - loss: 5.4999 - sparse_categorical_accuracy: 0.2156
Epoch 4/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 519ms/step - loss: 4.9868 - sparse_categorical_accuracy: 0.0936
Epoch 5/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 366ms/step - loss: 4.5412 - sparse_categorical_accuracy: 0.1314
Epoch 6/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 350ms/step - loss: 4.2083 - sparse_categorical_accuracy: 0.1608
Epoch 7/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 354ms/step - loss: 3.9517 - sparse_categorical_accuracy: 0.1781

# Encoder-Decoder With Attention

**1-Luong Attension**

In [15]:
attention_layer = Attention(use_scale=True)
encoder_decoder_2 = EncoderDecoderWithAttention(input_dim_encoder = all_words, input_dim_decoder = all_words , output_dim_encoder = 100, output_dim_decoder = 100,  unit1 = 128, unit2=all_words, attention_layer = attention_layer, early_stop=early_stopping)
encoder_outputs, encoder_inputs, encoder_states = encoder_decoder_2.encoder()
decoder_inputs, decoder_outputs = encoder_decoder_2.decoder(encoder_states=encoder_states, encoder_outputs=encoder_outputs)
model_2 = encoder_decoder_2.build_model()
encoder_decoder_2.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_2, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_2.model_fit(model = model_2, epochs = 120, batch_size = 32, encoder_input = pad_input, decoder_input = decoder_input, decoder_output = decoder_output)
encoder_decoder_2.save_model(model=model_2, name='lstm_2')

Epoch 1/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 527ms/step - loss: 4.8035 - sparse_categorical_accuracy: 0.5308
Epoch 2/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 477ms/step - loss: 2.6779 - sparse_categorical_accuracy: 0.5772
Epoch 3/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 521ms/step - loss: 2.3623 - sparse_categorical_accuracy: 0.6273
Epoch 4/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 529ms/step - loss: 2.1023 - sparse_categorical_accuracy: 0.6784
Epoch 5/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 478ms/step - loss: 1.8977 - sparse_categorical_accuracy: 0.7061
Epoch 6/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 519ms/step - loss: 1.7401 - sparse_categorical_accuracy: 0.7265
Epoch 7/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 471ms/step - loss: 1.6216 - sparse_categorical_accuracy: 0.7402

**2-Bahdanau Attension**

In [17]:
attention_layer = AdditiveAttention(use_scale=True)
encoder_decoder_3 = EncoderDecoderWithAttention(input_dim_encoder = all_words, input_dim_decoder = all_words , output_dim_encoder = 100, output_dim_decoder = 100,  unit1 = 128, unit2=all_words, attention_layer = attention_layer, early_stop=early_stopping)
encoder_outputs, encoder_inputs, encoder_states= encoder_decoder_3.encoder()
decoder_inputs, decoder_outputs = encoder_decoder_3.decoder(encoder_states=encoder_states, encoder_outputs=encoder_outputs)
model_3 = encoder_decoder_3.build_model()
encoder_decoder_3.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_3, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_3.model_fit(model = model_3, epochs = 120, batch_size = 32, encoder_input = pad_input, decoder_input = decoder_input, decoder_output = decoder_output)
encoder_decoder_3.save_model(model=model_3, name='lstm_3')

Epoch 1/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 529ms/step - loss: 4.8841 - sparse_categorical_accuracy: 0.5307
Epoch 2/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 512ms/step - loss: 2.6800 - sparse_categorical_accuracy: 0.5793
Epoch 3/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 551ms/step - loss: 2.3549 - sparse_categorical_accuracy: 0.6308
Epoch 4/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 515ms/step - loss: 2.0981 - sparse_categorical_accuracy: 0.6804
Epoch 5/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 526ms/step - loss: 1.8914 - sparse_categorical_accuracy: 0.7078
Epoch 6/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 552ms/step - loss: 1.7359 - sparse_categorical_accuracy: 0.7262
Epoch 7/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 492ms/step - loss: 1.6175 - sparse_categorical_accuracy: 0.7402

**3-MultiHead Attesnion**

In [19]:
attention_layer = MultiHeadAttention(num_heads=4, key_dim=32)
encoder_decoder_4 = EncoderDecoderWithAttention(input_dim_encoder = all_words, input_dim_decoder = all_words , output_dim_encoder = 100, output_dim_decoder = 100,  unit1 = 128, unit2=all_words, attention_layer = attention_layer, early_stop=early_stopping)
encoder_outputs, encoder_inputs, encoder_states = encoder_decoder_4.encoder()
decoder_inputs, decoder_outputs = encoder_decoder_4.decoder(encoder_states=encoder_states, encoder_outputs=encoder_outputs)
model_4 = encoder_decoder_4.build_model()
encoder_decoder_4.model_compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', model = model_4, metrics=['sparse_categorical_accuracy'])
history = encoder_decoder_4.model_fit(model = model_4, epochs = 120, batch_size = 32, encoder_input = pad_input, decoder_input = decoder_input, decoder_output = decoder_output)
encoder_decoder_4.save_model(model=model_4, name='lstm_4')

Epoch 1/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 518ms/step - loss: 4.6999 - sparse_categorical_accuracy: 0.5307
Epoch 2/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 477ms/step - loss: 2.6671 - sparse_categorical_accuracy: 0.5755
Epoch 3/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 517ms/step - loss: 2.2867 - sparse_categorical_accuracy: 0.6428
Epoch 4/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 538ms/step - loss: 1.9957 - sparse_categorical_accuracy: 0.6909
Epoch 5/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 521ms/step - loss: 1.8164 - sparse_categorical_accuracy: 0.7113
Epoch 6/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 562ms/step - loss: 1.6890 - sparse_categorical_accuracy: 0.7260
Epoch 7/120
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 530ms/step - loss: 1.5884 - sparse_categorical_accuracy: 0.7367