In [None]:
import warnings
import tensorflow as tf
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, RNN
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam, Nadam
from tensorflow.keras.layers import Input, Softmax
from tensorflow.keras.models import Model
import nltk.translate.bleu_score as bleu
import matplotlib.ticker as ticker
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import numpy as np
import zipfile
import random
import shutil
import pickle
import pytz
import re
import os
import io

In [None]:
tokenizer_enc = pickle.load(open(r"tokenizer_encoder.pkl", "rb"))
tokenizer_dec = pickle.load(open(r"tokenizer_decoder.pkl", "rb"))

train_enc_inp = pickle.load(open(r"train_encoder_input.pkl", "rb"))
val_enc_inp = pickle.load(open(r"validation_encoder_input.pkl", "rb"))
test_enc_inp = pickle.load(open(r"test_encoder_input.pkl", "rb"))

train_dec_inp = pickle.load(open(r"train_decoder_input.pkl", "rb"))
val_dec_inp = pickle.load(open(r"validation_decoder_input.pkl", "rb"))
test_dec_inp = pickle.load(open(r"test_decoder_input.pkl", "rb"))

train_dec_out = pickle.load(open(r"train_decoder_output.pkl", "rb"))
val_dec_out = pickle.load(open(r"validation_decoder_output.pkl", "rb"))
test_dec_out = pickle.load(open(r"test_decoder_output.pkl", "rb"))

In [None]:
class Encoder(tf.keras.layers.Layer):


    def __init__(self, inp_vocab_size, embedding_size, enc_units, input_length):
        super().__init__()
        self.enc_units = enc_units
        self.embedding = Embedding(input_dim=inp_vocab_size, output_dim=300, input_length=input_length,
                                   mask_zero=True, name="embedding_layer_encoder")

        self.lstmcell = tf.keras.layers.LSTMCell(enc_units)
        self.enc = RNN(self.lstmcell, return_sequences=True, return_state=True)

    def call(self, input_sequence, states):
        embedding_enc = self.embedding(input_sequence)
        enc_output, enc_state_h, enc_state_c = self.enc(embedding_enc, initial_state=states)
        return enc_output, enc_state_h, enc_state_c

    def initialize_states(self, batch_size):
        ini_hidden_state = tf.zeros([batch_size, self.enc_units])
        ini_cell_state = tf.zeros([batch_size, self.enc_units])
        return [ini_hidden_state, ini_cell_state]

In [None]:
class Attention(tf.keras.layers.Layer):


      def __init__(self,att_units):
        super().__init__()
        self.softmax = Softmax(axis=1)

      def call(self, decoder_hidden_state, encoder_output):
        attention_weight = tf.matmul(encoder_output, tf.expand_dims(decoder_hidden_state, axis=2))
        context = tf.matmul(tf.transpose(encoder_output, perm=[0, 2, 1]), attention_weight)
        context = tf.squeeze(context, axis=2)
        output = self.softmax(attention_weight)
        return context,output

In [None]:
class OneStepDecoder(tf.keras.Model):


  def __init__(self, tar_vocab_size, embedding_dim, input_length, dec_units, att_units):
        super().__init__()
        self.tar_vocab_size = tar_vocab_size
        self.dec_units = dec_units
        self.att_units = att_units
        self.embedding = Embedding(input_dim=tar_vocab_size, output_dim=300, input_length=input_length,
                           mask_zero=True, name="embedding_layer_decoder")

        self.lstmcell = tf.keras.layers.LSTMCell(dec_units)
        self.decoder_lstm = RNN(self.lstmcell, return_sequences=True, return_state=True)
        self.dense = Dense(tar_vocab_size)
        self.attention = Attention(self.att_units)

  def call(self, input_to_decoder, encoder_output, state_h, state_c):
        embedding_layer = self.embedding(input_to_decoder)
        embedding_layer = tf.squeeze(embedding_layer,axis=1)
        context_vector, attention_weights = self.attention(state_h, encoder_output)
        context_vector_for_concat = tf.concat([context_vector, embedding_layer], 1)
        context_vector_for_concat = tf.expand_dims(context_vector_for_concat, 1)
        dec_output, dec_state_h, dec_state_c = self.decoder_lstm(context_vector_for_concat, initial_state=[state_h, state_c])
        output_after_dense_layer = self.dense(dec_output)
        output_after_dense_layer = tf.squeeze(output_after_dense_layer, axis=1)
        return output_after_dense_layer, dec_state_h, dec_state_c, attention_weights, context_vector

In [None]:
class Decoder(tf.keras.Model):


    def __init__(self,out_vocab_size, embedding_dim, input_length, dec_units, att_units):
        super().__init__()
        self.out_vocab_size = out_vocab_size
        self.embedding_dim = embedding_dim
        self.dec_units = dec_units
        self.att_units = att_units
        self.input_length = input_length
        self.onestepdecoder = OneStepDecoder(self.out_vocab_size, self.embedding_dim, self.input_length, self.dec_units, self.att_units)

    @tf.function
    def call(self, input_to_decoder, encoder_output, decoder_hidden_state, decoder_cell_state):
        all_outputs = tf.TensorArray(tf.float32, size=input_to_decoder.shape[1])
        for timestep in range(input_to_decoder.shape[1]):
            output, decoder_hidden_state, decoder_cell_state, attention_weights, context_vector = self.onestepdecoder(input_to_decoder[:, timestep:timestep+1], encoder_output, decoder_hidden_state, decoder_cell_state)
            all_outputs = all_outputs.write(timestep, output)
        all_outputs = tf.transpose(all_outputs.stack(), [1, 0, 2])
        return all_outputs

In [None]:
class encoder_decoder(tf.keras.Model):


    def __init__(self, inp_vocab_size, out_vocab_size, embedding_size, lstm_size, input_length, batch_size, att_units, *args):
      super().__init__()
      self.encoder = Encoder(inp_vocab_size, embedding_size, lstm_size, input_length)
      self.decoder = Decoder(out_vocab_size, embedding_size, input_length, lstm_size, att_units)
      self.batch = batch_size

    def call(self,data):
      input,output = data[0], data[1]
      l = self.encoder.initialize_states(self.batch)
      encoder_output, encoder_final_state_h, encoder_final_state_c = self.encoder(input, l)
      decoder_output = self.decoder(output, encoder_output, encoder_final_state_h, encoder_final_state_c)
      return decoder_output

In [None]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)

In [None]:
enc_vocab_size = len(tokenizer_enc.word_index) + 1
dec_vocab_size = len(tokenizer_dec.word_index) + 1
embedding_dim = 300
input_length = 12
lstm_size = 192
batch_size = 1024
att_units = 192

In [None]:
train_trunc_idx = (train_enc_inp.shape[0]//batch_size)*batch_size
val_trunc_idx = (val_enc_inp.shape[0]//batch_size)*batch_size

train_enc_inp_truncated = train_enc_inp[:train_trunc_idx]
train_dec_inp_truncated = train_dec_inp[:train_trunc_idx]
train_dec_out_truncated = train_dec_out[:train_trunc_idx]

val_enc_inp_truncated = val_enc_inp[:val_trunc_idx]
val_dec_inp_truncated = val_dec_inp[:val_trunc_idx]
val_dec_out_truncated = val_dec_out[:val_trunc_idx]

In [None]:
model = encoder_decoder(enc_vocab_size, dec_vocab_size, embedding_dim, lstm_size, input_length, batch_size, att_units)

In [None]:
log_dir=f'attention_mechanism/models/attention_mechanism/Logs/'
early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.9, mode='min')
check_point = ModelCheckpoint('attention_mechanism/models/attention_mechanism/', monitor='val_loss',
                              save_best_only=True, save_weights_only=True, mode='min', verbose=0)

In [None]:
model.compile(optimizer=Nadam(learning_rate=0.001), loss=loss_function)

In [None]:
model_history = model.fit(x=[train_enc_inp_truncated, train_dec_inp_truncated], y=train_dec_out_truncated,
                          validation_data=([val_enc_inp_truncated, val_dec_inp_truncated], val_dec_out_truncated),
                          epochs=30, batch_size=batch_size, callbacks=[early_stop, reduce_lr, check_point])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 24: ReduceLROnPlateau reducing learning rate to 0.0009000000427477062.
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
model.summary()

Model: "encoder_decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder (Encoder)           multiple                  28032624  
                                                                 
 decoder (Decoder)           multiple                  38973178  
                                                                 
Total params: 67005802 (255.61 MB)
Trainable params: 67005802 (255.61 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
corrc_wrd_idx_dict = tokenizer_dec.word_index
corrc_idx_wrd_dict = {v: k for k, v in corrc_wrd_idx_dict.items()}

def predict(input_sentence):
    input_sentence = tokenizer_enc.texts_to_sequences([input_sentence])[0]
    initial_hidden_state = tf.zeros([1, 192])
    initial_cell_state = tf.zeros([1, 192])
    encoder_initial_state = [initial_hidden_state, initial_cell_state]
    input_sentence = tf.keras.preprocessing.sequence.pad_sequences([input_sentence], maxlen=12, padding='post')
    input_sentence = input_sentence[0]
    enc_output, enc_state_h, enc_state_c = model.layers[0](np.expand_dims(input_sentence, 0), encoder_initial_state)
    pred = []
    sentence = []
    cur_vec = np.ones((1, 1), dtype='int')
    attention_array = np.zeros([12, 12])
    for i in range(12):
        output,deco_state_h, deco_state_c, att_weights, context_vector = model.layers[1].onestepdecoder(cur_vec, enc_output, enc_state_h, enc_state_c)
        enc_state_h, enc_state_c = deco_state_h, deco_state_c
        cur_vec = np.reshape(np.argmax(output), (1, 1))
        if corrc_idx_wrd_dict[cur_vec[0][0]] == '<end>':
            break
        pred.append(cur_vec[0][0])
        att_weights = tf.squeeze(att_weights)
        attention_array[i] = att_weights
    for i in pred:
        sentence.append(corrc_idx_wrd_dict[i])
    return " ".join(sentence), attention_array

In [None]:
train = pd.read_csv('gec_train.csv', na_filter=False, index_col=0)
validation = pd.read_csv('gec_validation.csv', na_filter=False, index_col=0)
test = pd.read_csv('gec_test.csv', na_filter=False, index_col=0)

In [None]:
model.save('attention_mechanism/models/attention_mechanism/')



In [None]:
model.save_weights('attention_mechanism/models/attention_mechanism/attention_with_weight')