In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,LSTM,Dense,Bidirectional,Concatenate,Attention
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

In [28]:
data_path = 'fra-eng/fra.txt'
latent_dim = 256 # Content vector / Hyper-parameter tunne 

num_sample = 10000 # use first 10000 samples from the file


In [29]:
# Loading data into list 
# input_words # translate from
# output_words # translate to

input_texts = []
output_texts = []

# Unique input character (a,b,c,d ......)
# unique output character(.....)
# use build in data type set for non unique char

input_char = set()
output_char = set()


# opeining the file 

with open(data_path,'r',encoding='utf-8') as f:
    lines = f.read().split('\n')

# num_sample take the first 10000 rows to train 

for line in lines[:num_sample]:
    input_word, output_word,_ = line.split("\t")

    # Adding Start and End to the target word so that the decoder knows where to start from and when to stop
    
    output_word = 'Start' + output_word + 'End'
    
    # appending the input_word word and output_word to the list
    
    input_texts.append(input_word)
    output_texts.append(output_word)
    
    # adding the unique character into the set
    
    for char in input_word:
        if char not in input_char:
            input_char.add(char)
    for char in output_word:
        if char not in output_char:
            output_char.add(char)

In [30]:
# Sorting the unique char in ascending order

input_char = sorted(input_char)
output_char = sorted(output_char)

# encoder/decoder length 
# unique char-length

encoder_length = len(input_char)
decoder_length = len(output_char)

# Finding the maximum length of sentence

max_encoder_length = max([len(txt) for txt in input_texts])
max_decoder_length = max([len(txt) for txt in output_texts])



In [32]:
# Assign tokens to unique char

input_token = dict([(char,i) for i,char in enumerate(input_char)])
output_token = dict([(char,i) for i ,char in enumerate(output_char)])

In [33]:
# creating the 3D tensor of zero's initially

# length of input_text 
# max_encoder_length is the maximum length of sentence present in input_text
# encoder length is the length of all the unique char present in input_char

encoder_input_data = np.zeros((len(input_texts),max_encoder_length,encoder_length),dtype = 'float32')
decoder_input_data = np.zeros((len(input_texts),max_decoder_length,decoder_length),dtype = 'float32')
decoder_output_data = np.zeros((len(input_texts),max_decoder_length,decoder_length),dtype = 'float32')

In [34]:
# Adding pre padding to the input character 
encoder_input_data = pad_sequences(encoder_input_data,padding='pre',maxlen = max_decoder_length)

In [35]:
# assigning 1  where that particular char is present in the input and output Token
for i, (input_text,target_text) in enumerate(zip(input_texts,output_texts)):
    for t,char in enumerate(input_text):
        encoder_input_data[i,t,input_token[char]] = 1
        
    encoder_input_data[i,t+1:,input_token[' ']] = 1
        
    for t,char_t in enumerate(target_text):
        decoder_input_data[i,t,output_token[char_t]] = 1
        if t>0:
            decoder_output_data[i,t-1,output_token[char_t]] =1
    decoder_input_data[i,t+1:,output_token[' ']]= 1
    decoder_output_data[i,t:,output_token[' ']] = 1

In [36]:
# Attention model
input_encoder = Input(shape = (None,encoder_length))

bidirectional_lstm  = Bidirectional(LSTM(latent_dim,return_sequences =True,return_state =True))

encoder_outputs1, forw_state_h, forw_state_c, back_state_h, back_state_c = bidirectional_lstm(input_encoder)

final_enc_h = Concatenate()([forw_state_h,back_state_h])
final_enc_c = Concatenate()([forw_state_c,back_state_c])

# get Context vector
encoder_states_1 =[final_enc_h, final_enc_c]

In [37]:
input_decoder = Input(shape = (None,decoder_length))
decoder_lstm = LSTM(latent_dim*2,return_sequences =True,return_state =True)

decoder_outputs_1,_,_ = decoder_lstm(input_decoder,initial_state=encoder_states_1)

In [38]:
# Using Attention Layer

attention_layer = Attention()
attention_results = attention_layer([encoder_outputs1,decoder_outputs_1])

# Concat attention output and decoder LSTM output 
decoder_concat_input = Concatenate(axis=-1)([decoder_outputs_1, attention_results])

decoder_dense = Dense(decoder_length,activation ='softmax')
decoder_output_dense = decoder_dense(decoder_concat_input)


In [40]:
model = Model([input_encoder,input_decoder],decoder_output_dense)
model.compile(optimizer='rmsprop',loss = 'categorical_crossentropy',metrics =['accuracy'])

In [None]:
model.fit([encoder_input_data,decoder_input_data],decoder_output_data,epochs=17,validation_split=0.3)