In [17]:
import keras

In [18]:
import tensorflow as tf

In [19]:
from tensorflow.keras.layers import Input,Embedding,GRU,Bidirectional,Dense

In [20]:
from tensorflow.keras import Model

In [21]:
import numpy as np

In [22]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)
        
    def call(self, inputs):
        query,values=inputs
        query_with_time_axis = tf.expand_dims(query, 1)
        score1=self.W1(query_with_time_axis)
        score2=self.W2(values)
        combined_score=tf.nn.tanh(score1 + score2)
        score = self.V(combined_score)
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector,attention_weights

In [27]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, enc_units):
        super(Encoder, self).__init__()
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.enc_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')

    def call(self, x):
        x = self.embedding(x)
        output, state = self.gru(x)
        return output, state


In [28]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, dec_units):
        super(Decoder, self).__init__()
        self.dec_units = dec_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(vocab_size)

        self.attention = BahdanauAttention(self.dec_units)

    def call(self,inputs):
        x, hidden, enc_output=inputs
        context_vector, attention_weights = self.attention([hidden, enc_output])
        x=tf.expand_dims(x,0)
        x = self.embedding(x)
        x=tf.transpose(x,perm=(1,0,2))
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        output, state = self.gru(x,initial_state=hidden)

        output = tf.reshape(output, (-1, output.shape[2]))

        x = self.fc(output)

        return x, state, attention_weights

In [29]:
class Seq2Seq(tf.keras.Model):
    def __init__(self, i_vocab_size,o_vocab_size,embedding_dim, gru_units):
        super(Seq2Seq, self).__init__()
        self.encoder = Encoder(i_vocab_size, embedding_dim, gru_units)
        self.decoder = Decoder(o_vocab_size, embedding_dim, gru_units)

    def call(self, inputs):
        input_seq, target_seq= inputs
        enc_output, enc_hidden = self.encoder(input_seq)

        dec_hidden = enc_hidden

        loss = 0

        for t in range(target_seq.shape[1]):
            predictions, dec_hidden, _ = self.decoder([target_seq[:, t], dec_hidden, enc_output])

            loss += tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(target_seq[:, t], predictions, 
                                                                                   from_logits=True))

        batch_loss = (loss / int(target_seq.shape[1]))

        return batch_loss


In [30]:
model = Seq2Seq(src_vocab_size,trg_vocab_size,embed_size,gru_units)

In [31]:
optimizer=keras.optimizers.Adam(learning_rate=0.7,clipnorm=4)

In [32]:
import numpy

In [40]:
NUM_EPOCHS =2

for epoch in range(NUM_EPOCHS):
    total_loss = 0.0
    steps_per_epoch = max_inp_len // batch_size
    with tf.GradientTape() as tape:
        loss = model([src_sequences,tar_sequences])
        total_loss += loss

    grads = tape.gradient(total_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {total_loss.numpy():.4f}")

Epoch 1/2, Loss: 1.3728
Epoch 2/2, Loss: 1.1201


In [41]:
model.summary()

Model: "seq2_seq"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder (Encoder)           multiple                  187000    
                                                                 
 decoder (Decoder)           multiple                  419426    
                                                                 
Total params: 606426 (2.31 MB)
Trainable params: 606426 (2.31 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [4]:
import keras

In [5]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [8]:
inputs = []
outputs = []

data_file = open('spa.txt', encoding='utf-8')

count = 0
for line in data_file:
    count += 1
    if count > 100:
        break
    if '\t' not in line:
        continue
    ip, temp_op, extra = line.rstrip().split('\t')
    op = temp_op
    inputs.append(ip)
    outputs.append(op)

In [9]:
inputs=[sentence.lower() for sentence in inputs]
outputs=[sentence.lower() for sentence in outputs]

In [10]:
outputs = ['<start> '+sentence+' <end>' for sentence in outputs]

In [11]:
EnglishTokenizer=Tokenizer(oov_token="<UNK>")
EnglishTokenizer.fit_on_texts(inputs)
inp_sequences=EnglishTokenizer.texts_to_sequences(inputs)
max_inp_len=max(len(i) for i in inp_sequences)
src_sequences=pad_sequences(inp_sequences,maxlen=max_inp_len,padding="post")
Englishword2index=EnglishTokenizer.word_index
Englishindex2word=EnglishTokenizer.index_word

In [12]:
SpanishTokenizer=Tokenizer(oov_token="<UNK>")
SpanishTokenizer.fit_on_texts(outputs)
op_sequences=SpanishTokenizer.texts_to_sequences(outputs)
max_tar_len=max(len(i) for i in op_sequences)
tar_sequences=pad_sequences(op_sequences,maxlen=max_tar_len,padding="post")
Spanishword2index=SpanishTokenizer.word_index
Spanishindex2word=SpanishTokenizer.index_word

In [13]:
src_vocab_size=len(Englishword2index)+1
trg_vocab_size=len(Spanishword2index)+1
print("src_vocab_size:",src_vocab_size)
print("tar_vocab_size:",trg_vocab_size)

src_vocab_size: 58
tar_vocab_size: 125


In [14]:
print("max_inp_len:",max_inp_len)
print("max_tar_len:",max_tar_len)

max_inp_len: 2
max_tar_len: 5


In [15]:
gru_units=200
embed_size=100

In [16]:
tar_sequences.shape

(100, 5)

In [12]:
import numpy as np

In [13]:
x=np.random.random((2,3))

In [47]:
dataset = tf.data.Dataset.from_generator(
    data_generator,
    args=(src_sequences,tar_sequences, batch_size),
    output_types=(tf.int32, tf.int32),
    output_shapes=(
        tf.TensorShape((batch_size, src_sequences.shape[1])),
        tf.TensorShape((batch_size, tar_sequences.shape[1]))
    )
)

In [14]:
x[:, 0].shape

(2,)

In [69]:
def data_generator(encoder_input_data, decoder_input_data, batch_size):
    dataset_length = len(encoder_input_data)
    steps_per_epoch = dataset_length // batch_size

    while True:
        for i in range(steps_per_epoch):
            encoder_batch = encoder_input_data[i * batch_size: (i + 1) * batch_size]
            decoder_batch = decoder_input_data[i * batch_size: (i + 1) * batch_size]

            yield (encoder_batch, decoder_batch)

In [46]:
batch_size=1