In [None]:
import numpy as np
import pandas as pd 
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Bidirectional, LSTM,  Dense, Softmax,Attention,Input,RepeatVector, Concatenate, Permute, Dot,Multiply,Activation
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import string

In [None]:
#load the Data i am using a slightly modified ver from http://www.manythings.org
data=pd.read_csv("../input/englisharabic/eng-ara.csv")

In [None]:
data.head()

In [None]:
data.shape

In [None]:
data.info

In [None]:
#checking for missing labels
data.isna().sum()

In [None]:
def text_preprocessing(txt):
    txt="".join(c for c in txt if c not in string.punctuation).lower().strip()
    txt.encode('utf8','ignore')
    return txt

In [None]:
for c in data.columns :
    data[c]=data[c].apply(lambda x:text_preprocessing(x))

In [None]:
import unicodedata

import re

# Convert the unicode sequence to ascii
def unicode_to_ascii(s):

  # Normalize the unicode string and remove the non-spacking mark
  return ''.join(c for c in unicodedata.normalize('NFD', s)
      if unicodedata.category(c) != 'Mn')
def preprocess_sentence(w):

# Preprocess the sequence
  w = unicode_to_ascii(w.lower().strip())

  # Create a space between word and the punctuation following it
  w = re.sub(r"([?.!,¿])", r" \1 ", w)
  w = re.sub(r'[" "]+', " ", w)

  # Replace everything with space except (a-z, A-Z, ".", "?", "!", ",")
  w = re.sub(r"[^a-zA-Z?.!,¿\u0600-\u06FF]+", " ", w)

  w = w.strip()

  # Add a start and stop token to detect the start and end of the sequence
  #w = '<start> ' + w + ' <end>'
  return w

In [None]:
for col in data.columns:
    data[col]=data[col].apply(lambda x:preprocess_sentence(x))

In [None]:
type(data)

In [None]:
data.head()

In [None]:
inp_txt = data["English"].copy()
targ_txt= data["Arabic"].copy()

In [None]:
max_targ_size=max_inp_size=15

In [None]:
inp_tokenizer=Tokenizer()

In [None]:
inp_tokenizer.fit_on_texts(inp_txt)
inp_vocab=inp_tokenizer.word_index

In [None]:
list(inp_vocab)[:10]

In [None]:
inp_seq=inp_tokenizer.texts_to_sequences(inp_txt)

In [None]:
inp_seq=pad_sequences(inp_seq,maxlen=max_inp_size,padding="pre",truncating='post')

In [None]:
list(inp_seq)[:2]

In [None]:
targ_tokenizer=Tokenizer()

In [None]:
targ_tokenizer.fit_on_texts(targ_txt)
targ_vocab=targ_tokenizer.word_index

In [None]:
list(targ_vocab)[:10]

In [None]:
len(list(targ_vocab))

In [None]:
inv_targ_vocab={v:k for k,v in targ_vocab.items()}

In [None]:
list(inv_targ_vocab)[:10]

In [None]:
targ_seq=targ_tokenizer.texts_to_sequences(targ_txt)

In [None]:
list(targ_seq)[:10]

In [None]:
targ_seq =pad_sequences(targ_seq,maxlen=max_targ_size,padding="pre",truncating='post')

In [None]:
list(targ_seq)[:10]

In [None]:
oh_inp_seq=to_categorical(inp_seq,num_classes=len(inp_vocab)+1)

In [None]:
list(oh_inp_seq)[:2]

In [None]:
oh_targ_seq=to_categorical(targ_seq,num_classes=len(targ_vocab)+1)

In [None]:
list(oh_targ_seq)[:2]

In [None]:
#the encoder,it's pre-attention bi diractional lstm 
#the number of units should be the size of the one-hot vector 
#the input shape consists of just one line(batch size 1) that contains 15 words (padding included)
#the return_sequence should be set to true to get the respetive hidden state of each input 
encoder = Bidirectional(LSTM(units=4040,return_sequences=True,input_shape=(1, 15,8080 )))

In [None]:
#the decoder, the post attention lstm
#the return_sequence should be set to true to get the respetive hidden state of each input 
decoder = LSTM(8080, return_state = True)

In [None]:
import tensorflow.keras.backend as K
def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')
        

In [None]:
#we are using the same hiden state of the post attention lstm with all the hidden states from all the units of the pre attention lstm thats why we need to repeat it 
repeator = RepeatVector(15)
#concatinate before passing to the densor to calculate the respective energie 
concatenator = Concatenate(axis=-1)
#calculate the energie 
densor1 = Dense(10, activation = "tanh")
#the final value for the energies (making sure they are +)
densor2 = Dense(1, activation = "relu")
#calculates the attention weights
activator = Activation(softmax, name='attention_weights')
output_layer = Dense(10558, activation=softmax)
dotor = Dot(axes = 1)

In [None]:
def one_step_attention(a, s_prev):
    #passing the previous hidden state 
    s_prev = repeator(s_prev) 
    #concatinate all the privous hidden states from the pre attention lstm with the current one 
    concat = concatenator([a,s_prev]) 
    #calculate the energie 
    e = densor1(concat)
    #passing only the + values 
    energies = densor2(e) 
    #calculates the attention weights
    alphas = activator(energies)
    #calculates the context vector 
    context = dotor([alphas,a]) 
    return context

In [None]:
n_a = 4040
n_s = 8080
X = Input(shape=(15,4040))
s0 = Input(shape=(n_s,), name='s0')
c0 = Input(shape=(n_s,), name='c0')
s = s0
c = c0
outputs = []

In [None]:
a=encoder(X)

In [None]:
for t in range(15):
    context = one_step_attention(a, s)
    s, _, c = decoder(context,initial_state = [s, c] ) 
    out = output_layer(s)
    outputs.append(out)

In [None]:
model = Model(inputs=[X,s0,c0],outputs=outputs)

In [None]:
model.summary()

In [None]:
opt = Adam(lr=0.005, beta_1=0.9, beta_2=0.999,decay=0.01) 
model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])

In [None]:
s0 = np.zeros((1, n_s))
c0 = np.zeros((1, n_s))
outputs = list(oh_targ_seq.swapaxes(0,1))

In [None]:
model.fit([oh_inp_seq, s0, c0], outputs, epochs=1, batch_size=100)