In [26]:
from tensorflow.keras import Model, Input, Sequential

from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense, Layer, Add, GRU, \
                                    Activation, Softmax, Concatenate, TimeDistributed

from tensorflow.keras.losses import SparseCategoricalCrossentropy

import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import time

import pandas as pd
import numpy as np

import unicodedata
import re

In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import random
import nltk
from nltk.tokenize import word_tokenize

nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [28]:

# Define the Seq2Seq model with attention
class Seq2Seq(nn.Module):
    def __init__(self, input_dim, emb_dim, hidden_dim, output_dim, dropout):
        super().__init__()

        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hidden_dim)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

        self.attention = nn.Linear(hidden_dim * 2, 1)

    def forward(self, src):
        # src: (src_len, batch_size)

        embedded = self.dropout(self.embedding(src))
        # embedded: (src_len, batch_size, emb_dim)

        outputs, hidden = self.rnn(embedded)
        # outputs: (src_len, batch_size, hidden_dim)
        # hidden: (1, batch_size, hidden_dim)

        # Calculate attention weights
        attn_weights = self.attention(torch.cat((hidden.repeat(src.shape[0], 1, 1), outputs), dim=2))
        attn_weights = torch.softmax(attn_weights, dim=0)

        # Apply attention to the outputs
        context = torch.sum(attn_weights * outputs, dim=0)
        context = context.unsqueeze(0)

        # context: (1, batch_size, hidden_dim)

        prediction = self.fc_out(context)
        # prediction: (1, batch_size, output_dim)

        return prediction.squeeze(0)

In [29]:
df=pd.read_csv("/content/drive/MyDrive/iisc/nlp/Assignment2aDataset (1).txt")

In [30]:
df.columns=['input','output']

In [31]:
df

Unnamed: 0,input,output
0,'9 may 1630','1630-05-09'
1,'15/03/2014','2014-03-15'
2,'mar 16 1675','1675-03-16'
3,'jun 16 1640','1640-06-16'
4,'friday 1791 2 09','1791-09-02'
...,...,...
39994,'december 26 1900','1900-12-26'
39995,'15 may 1828','1828-05-15'
39996,'friday april 18 1851','1851-04-18'
39997,'june 11 2070','2070-06-11'


In [32]:
from nltk.tokenize import word_tokenize

In [33]:
def preprocessing(df):
    # Initialize an empty list to store tokenized sentences

    df= df.str.replace('\'',' ').astype(str)
    df = df.str.replace('/',' ').astype(str)
    df = df.str.replace('-',' ').astype(str)
    tokenized_sentences = []

    # Tokenize and add start and end tokens to each sentence
    for sentence in df:
        tokens = word_tokenize(sentence)
        tokens = ['start'] + tokens + ['end']
        tokenized_sentences.append(tokens)



    return tokenized_sentences


In [34]:
tokenizer = Tokenizer()

In [35]:
in_df=preprocessing(df['input'])
out_df=preprocessing(df['output'])

In [36]:
print(in_df,out_df)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [37]:
df['input']=in_df
df['output']=out_df

In [38]:
df

Unnamed: 0,input,output
0,"[start, 9, may, 1630, end]","[start, 1630, 05, 09, end]"
1,"[start, 15, 03, 2014, end]","[start, 2014, 03, 15, end]"
2,"[start, mar, 16, 1675, end]","[start, 1675, 03, 16, end]"
3,"[start, jun, 16, 1640, end]","[start, 1640, 06, 16, end]"
4,"[start, friday, 1791, 2, 09, end]","[start, 1791, 09, 02, end]"
...,...,...
39994,"[start, december, 26, 1900, end]","[start, 1900, 12, 26, end]"
39995,"[start, 15, may, 1828, end]","[start, 1828, 05, 15, end]"
39996,"[start, friday, april, 18, 1851, end]","[start, 1851, 04, 18, end]"
39997,"[start, june, 11, 2070, end]","[start, 2070, 06, 11, end]"


In [39]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [40]:
def create_tokenizer(lines):

    # Initialize tokenizer
    tokenizer = Tokenizer()

    # Fit the tokenizer on the text provided
    tokenizer.fit_on_texts(lines)

    # Return the tokenizer
    return tokenizer
# Creating tokenizer instances for English text
tokenizer_input = create_tokenizer(df['input'])

# Creating tokenizer instances for French text
tokenizer_output = create_tokenizer(df['output'])

In [41]:
vocab_size_input = len(tokenizer_input.word_index) + 1

# For French
vocab_size_output = len(tokenizer_output.word_index) + 1

In [42]:
vocab_size_input

700

In [43]:
# Function to compute maximum length of tokenized forms
def max_length(lines):

    # Compute the maximum length amongst all sentences in text
    return max([len(s.split()) for s in lines])

# Storing the maximum length for English
length_input = 10

# Storing the maximum length for French
length_output = 5

In [44]:
# Function that will tokenize and pad the text
def encode_text(tokenizer, lines, length):

    # integer encode
    encoded = tokenizer.texts_to_sequences(lines)

    # pad encoded sequences
    padded = pad_sequences(encoded, maxlen=length, padding='post')

    return padded

# Apply the tokenization and padding on English Text
input = encode_text(tokenizer_input, df['input'], length_input)

# Apply the tokenization and padding on French Text
output = encode_text(tokenizer_output, df['output'], length_output)

In [45]:
print(input,input.shape)

[[ 1 28  3 ...  0  0  0]
 [ 1 37 71 ...  0  0  0]
 [ 1 53 27 ...  0  0  0]
 ...
 [ 1 51  9 ...  0  0  0]
 [ 1 13 17 ...  0  0  0]
 [ 1  4 32 ...  0  0  0]] (39999, 10)


In [46]:
from sklearn.model_selection import train_test_split

In [47]:
X_train, X_test, y_train, y_test=train_test_split(input,output,train_size=0.8,random_state=42)


In [48]:
input=X_train
output=y_train

In [49]:
print(input,output.size)

[[ 1 20  7 ...  0  0  0]
 [ 1 31  4 ...  0  0  0]
 [ 1 46  7 ...  0  0  0]
 ...
 [ 1 48 14 ...  0  0  0]
 [ 1 28 44 ...  0  0  0]
 [ 1  8 16 ...  0  0  0]] 159995


In [50]:
class Encoder(Model):

    # Intialize Model's Instance's parameters
    # the encoder parameters intialized above are taken as the default parameters for encoder

    def __init__(self, inp_vocab_size, inp_embed_size, inp_lstm_cells, batch_size, inp_len):

        super(Encoder, self).__init__()

        self.inp_embed_size = inp_embed_size # Initializing size that the Embedding layer outputs

        self.inp_vocab_size = inp_vocab_size # Initializing the vocabulary size of source text

        self.inp_lstm_cells = inp_lstm_cells # Initializing the number of hidden cells in the lstm layer(no of lstm)

        self.batch_size = batch_size         # Initializing the batch size
        self.inp_len = inp_len               # Initializing the maximum input length of any tokenized source text


        # Initializing an Embedding layer here

        self.enc_embedding = Embedding(self.inp_vocab_size, self.inp_embed_size, trainable=True)

        # Initializing an LSTM layer here

        self.lstm = LSTM(self.inp_lstm_cells, return_sequences=True, return_state=True)


    # Use 'call' function to make the Encoder Model produce actions that you would want it to

    # Takes the source input sequence as first input and a hidden state as the second input
    def call(self, inp_sequence, hidden_sequence):

        # First, call the embedding layer defined above to embed the source input sequence
        emb_output = self.enc_embedding(inp_sequence)
        # emb_output shape = [batch_size, max_source_inp_length, embedding_dim]

        inp_lstm_output, state_h, state_c = self.lstm(emb_output, initial_state = hidden_sequence)
        # lstm_output shape = [batch_size, max_source_inp_length, lstm_cells_dim]
        # state_h shape = [batch_size, lstm_cells_dim]

        # 1. lstm_output contains output of the LSTM from all time steps
        # 2. state_h contains output from the last time step
        # 3. state_c contains the information of the state of the LSTM at the last time step

        # return all the outputs at each time step (lstm_output) and the output at the last time step (state_h)
        return inp_lstm_output, state_h


    # This function is meant to initialize the hidden weights of the lstm layer

    def initialize_hidden_states(self):

        # There are two sets of initializations made - for the hidden cell input (h at t-1) and hidden cell state (c at t-1)
        return [tf.zeros([self.batch_size, self.inp_lstm_cells]), tf.zeros([self.batch_size, self.inp_lstm_cells])]
# Initialize an instance of the Encoder mo

In [51]:
batch_size = 32

# The source language's tokenized text will be embedded with this length
inp_embed_size = 128
# Number of hidden cells in the LSTM layer that learns sequences of source language
inp_lstm_cells = 256

# The target language's tokenized text will be embedded with this length
tar_embed_size = 128
# Number of hidden cells in the LSTM layer that learns sequences of target language
tar_lstm_cells = 256

# Number of attention units (will discuss about it now)
attention_units = 256

In [52]:
# Initialize an instance of the Encoder model above with the defined parameters

enc_model = Encoder(inp_vocab_size=vocab_size_input, inp_embed_size=inp_embed_size, inp_lstm_cells=inp_lstm_cells,
                    batch_size=batch_size, inp_len=length_input)


# Test the working below

# Initialize hidden states
initialized_hidden_states = enc_model.initialize_hidden_states()

# Feed an input and see if the model produces an output
enc_out, enc_state = enc_model(input[:batch_size], initialized_hidden_states)

inp_embed_size 128
inp_vocab_size 700
inp_lstm_cells 256


In [53]:

class Attention(Layer):

                                        # Default parameters supplied (same as encoder)
    def __init__(self, attention_units, bs = batch_size, inp_len = length_input):

        super(Attention, self).__init__()

        self.W1 = Dense(attention_units) # Initialize a Dense layer with neurons = number of attention_units
        self.W2 = Dense(attention_units) # Initialize a Dense layer with neurons = number of attention_units
        self.V = Dense(1)                 # Initialize a Dense layer with neurons = 1

        self.batch_size = bs
        self.inp_len = length_input


    # This function is executed when an instance of this class is called

    # hidden = output of the current target state
    # output = all outputs of the encoder model
    def call(self, hidden, output):

        # the shape of the hidden input is extended i.e. from [batch_size, tar_lstm_cells] to [batch_size, 1, tar_lstm_cells]
        hidden_extended = tf.expand_dims(hidden, axis=1)

        # hidden_extended shape = [batch_size, 1, target_lstm_cells_dim]
        # output shape = [batch_size, max_source_inp_length, source_lstm_cells_dim]

        # Formula 4 i.e. Bahdanau Attention applied here to generate a score
        score = self.V(tf.nn.tanh(self.W1(hidden_extended) + self.W2(output)))
        # score shape = [batch_size, max_source_inp_length, 1]                                     #### check this

        # Formula 1 applied here to get the attention weights (notice axis = 1)
        # since we have to compute a weight to each source input, the softmax is applied at axis=1 of score
        atn_weights = tf.nn.softmax(score, axis=1)
        # attention_weights shape = [batch_size, max_source_inp_length(128), 1]                         #### check this

        # A part of Formula 2 is applied where we multiple the attention weights with the outputs from the encoder model
        context_vector = atn_weights*output
        # context_vector shape = [batch_size, max_source_inp_length, source_lstm_cells]

        # The second part of Formula 2 where the multiplied product is summed against each source input
        # the axis=1 here has the same reasoning wherein we want to sum the values to get one vector for each source input
        context_vector = tf.reduce_sum(context_vector, axis=1)
        # context_vector shape = [batch_size, source_lstm_cells]

        # return the context_vector and the attention_weights generated from the above operations
        return context_vector, atn_weights

In [54]:
class Decoder(Model):

                        # Providing parameters to decoder to generate layers
    def __init__(self, tar_embed_size, tar_vocab_size, tar_lstm_cells, attention_units,
                 batch_size=batch_size, tar_len=length_output):

        super(Decoder, self).__init__()

        self.tar_embed_size = tar_embed_size # Initializing size that the Embedding layer outputs
        self.tar_vocab_size = tar_vocab_size # Initializing the vocabulary size of target text
        self.tar_lstm_cells = tar_lstm_cells # Initializing the number of hidden cells in the lstm layer
        self.batch_size = batch_size         # Initializing the batch size
        self.tar_len = tar_len               # Initializing the maximum input length of any tokenized target text
        self.attention_units = attention_units  # Initializing the number of units for the attention layer


        # Initializing an Embedding layer here

        # vocab_size given as the range within which it has to work
        # embed_size denoting the embedding output size
        self.dec_embedding = Embedding(self.tar_vocab_size, self.tar_embed_size, trainable=True)


        # Initializing an LSTM layer here

        # lstm_cells denotes the number of hidden cells inside the layer
        # return_sequences and return_state give out output of LSTM cell and its state at each time step
        self.lstm = LSTM(self.tar_lstm_cells, return_sequences=True, return_state=True)


        # Initializing an Attention layer here

        # attention_units denotes the number of neurons inside the layer
        self.attention = Attention(self.attention_units)


        # Initializing a Final Dense layer here

        # takes the same number or neurons as the vocabulary size of target text (producing a probability value for each word)
        self.final_layer = Dense(self.tar_vocab_size)


    # This call function is executed when an instance of this class is called

    # x = input to the decoder
    # hidden = output of the current target state
    # enc_output = all outputs of the encoder model
    def call(self, x, hidden, enc_output):

        # Applying the attention on hidden state of current target state and output from the encoder
        context_vector, attention_weights = self.attention(hidden, enc_output)
        # context_vector shape = [batch_size, lstm_cells]
        # attention_weights shape = [batch_size, max_source_inp_length, 1]                         #### check this

        # Apply embeddings on the decoder input
        emb_output = self.dec_embedding(x)
        # emb_output shape = [batch_size, 1, embedding_size]

        # Performing part 1 of Formula 3 i.e. concatenation of context_vector and embedding output
        # context_vector shape is expanded first from [batch_size, source_lstm_cells] to [batch_size, 1, source_lstm_cells]
        x_context = tf.concat([tf.expand_dims(context_vector, axis=1), emb_output], axis=-1)
        # x_context shape = [batch_size, 1, lstm_cells+embedding_size]

        # Performing part 2 of Formula 3 i.e. activating it using as LSTM layer
        # Running the x_context through as LSTM layer that learns the sequences
        tar_lstm_output, tar_state_h, tar_state_c = self.lstm(x_context)
        # lstm_output shape = [batch_size, 1, lstm_cells]
        # state_h shape = [batch_size, lstm_cells]
        # state_c shape = [batch_size, lstm_cells]

        # The output is reshaped here in order to be fed to the final Dense layer
        tar_lstm_output_reshaped = tf.reshape(tar_lstm_output, shape=(-1, tar_lstm_output.shape[2]))
        # tar_lstm_output_reshaped shape = [batch_size, lstm_cells]

        # the final dense layer outputs a score for each word that may be considered as a prediction
        # for this reason, the complete target vocabulary is provided as number of neurons to this layer
        word_prob = self.final_layer(tar_lstm_output_reshaped)
        # word_prob shape = [batch_size, tar_vocab_size]

        # return the word predictions, last output from the decoder LSTM and the attention_weights
        return word_prob, tar_state_h, attention_weights


In [55]:
dec_model = Decoder(tar_embed_size=tar_embed_size, tar_vocab_size=vocab_size_output, tar_lstm_cells=tar_lstm_cells,
                    attention_units=attention_units)

# Test the working below

dec_out, dec_state, atn_w = dec_model(tf.random.uniform((batch_size, 1)), enc_state, enc_out)

In [56]:
# Adam optimizer will be implemented
optimizer = tf.keras.optimizers.Adam()

# An instance of the SparseCategoricalCrossentropy Loss with some required arguments that define how it has to perform
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

In [57]:
# Initializing Loss function here
def loss_function(real, pred):

    # mask shape = [batch_size, ]
    mask = tf.math.logical_not(tf.math.equal(real, 0)) # tf.math.equal returns True when values are equal to 0
                                                #tf.math.logical_not inverses the above (True<==>False)

    # The mask is meant to only take losses where there is an actual word that exists i.e.
    # not for cases where the padding has been placed to put 0 as a token

    # Compute loss using real and pred values
    loss_ = loss_object(real, pred)
    # loss_ shape = [batch_size, ]

    # casts mask variable as a tf loss type variable
    mask = tf.cast(mask, dtype=loss_.dtype)

    # multiplies loss with mask (this operation will nullify any loss values where the padding has been put in place)
    loss_ *= mask
    # loss_ shape = [batch_size, ]

            # tf.reduce_mean computes the mean loss
    return tf.reduce_mean(loss_)

In [58]:


# Function for shuffling the dataset for fitting the model with (implemented every epoch)
def shuffler(lang_inp, lang_out):

    n_elem = lang_inp.shape[0]
    indices = np.random.choice(n_elem, size=n_elem, replace=False)

    return lang_inp[indices], lang_out[indices]


# Function for generating a batch of data from the dataset provided
def generator(batch_number, lang_input, lang_output):

    if len(lang_input) <= batch_number*batch_size+batch_size:

        return (lang_input[batch_number*batch_size:],
            lang_output[batch_number*batch_size:])

    return (lang_input[batch_number*batch_size: batch_number*batch_size+batch_size],
            lang_output[batch_number*batch_size: batch_number*batch_size+batch_size])

In [59]:
# Function to call for training every batch

@tf.function  # Decorator - Compiles the function below into a callable TensorFlow graph
def train_step(inp, targ, enc_hidden): # runs for a single batch

    # initialize loss as 0 for the batch
    loss = 0

    # uses gradient tape so that differentiation takes place accordingly to update gradient and weights
    with tf.GradientTape() as tape:

        # get the output from encoder model
        enc_output, enc_hidden = enc_model(inp, enc_hidden)
        # enc_output shape = [batch_size, max_source_inp_length, lstm_cells]
        # enc_hidden shape = [batch_size, lstm_cells]

        # assign enc_hidden state as dec_hidden state
        dec_hidden = enc_hidden
        # dec_hidden shape = [batch_size, lstm_cells]

        # for each sample in batch take starting index as the same as 'start' taken from target tokenizer
        # use the above as the first input to the decoder model
        dec_input = tf.expand_dims([tokenizer_output.word_index['start']] * batch_size, 1)
        # dec_input shape = [batch_size, 1]

        # Teacher forcing - feeding the target as the next input
        for t in range(1, targ.shape[1]): # run for the complete length of target outputs

          # passing enc_output to the decoder
          predictions, dec_hidden, _ = dec_model(dec_input, dec_hidden, enc_output)
          # predictions shape = [batch_size, tar_vocab_size]
          # dec_hidden shape = [batch_size, lstm_cells]
          # targ[:, t] shape = [batch_size, ]

          # compute loss for the word predicted (t) after the previous word supplied for all samples
          loss += loss_function(targ[:, t], predictions)

          # using teacher forcing (take the next word as an input to the decoder model)
          dec_input = tf.expand_dims(targ[:, t], 1)
          # dec_input shape = [batch_size, 1]

    # computing batch loss by dividing the total loss with the length of target
    batch_loss = (loss / int(targ.shape[1]))


    # As the loss has now been computed, let's notify the model to optimize the gradients

    # collecting all variables to compute gradient
    variables = enc_model.trainable_variables + dec_model.trainable_variables

    # compute the gradients. (loss is differentiated against variables)
    gradients = tape.gradient(loss, variables)

    # Applies gradients to variables
    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss

In [60]:
EPOCHS = 20 # Initialize the number of epochs to run

# To store loss of each epoch
loss_history = []

# Computing number of batches
number_of_batches = input.shape[0]//batch_size

# Splitting the dataset at the end to form full batches
lang_input_split = input[:number_of_batches*batch_size]
lang_output_split = output[:number_of_batches*batch_size]


for epoch in range(EPOCHS):

    start = time.time()

    # initialize hidden states of encoder for the epoch
    enc_hidden = enc_model.initialize_hidden_states()

    # total loss is 0 for the epoch
    total_loss = 0

    # Shuffle the dataset here
    lang_inp, lang_out = shuffler(lang_input_split, lang_output_split)

    # run for all batches of inputs and targets
    for batch_number in range(number_of_batches):

        # Generate the batch to be sent to the model for training
        inp, targ = generator(batch_number, lang_inp, lang_out)

        # apply the train step function declared earlier and get the loss for the batch
        batch_loss = train_step(inp, targ, enc_hidden)

        # add the batch loss to total loss for the epoch
        total_loss += batch_loss

        # printing some summaries inside the loop
        if batch_number % 200 == 0:
              print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                       batch_number,
                                                       batch_loss.numpy()))

    # Appending the total loss of the epoch here
    loss_history.append(total_loss / number_of_batches)


    # printing epoch summaries here
    print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / number_of_batches))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))


    # The below set of checks are made to ensure that loss is reducing with each epoch. It it is not, break out of the loop
    if epoch >= 2:
        if loss_history[-1] >= loss_history[-2]: #patience is 0
            print("breaking now as loss isn't reducing")
            break

Epoch 1 Batch 0 Loss 5.0972
Epoch 1 Batch 200 Loss 2.4632
Epoch 1 Batch 400 Loss 2.1736
Epoch 1 Batch 600 Loss 1.8890
Epoch 1 Batch 800 Loss 1.7666
Epoch 1 Loss 2.1774
Time taken for 1 epoch 29.944136381149292 sec

Epoch 2 Batch 0 Loss 1.6039
Epoch 2 Batch 200 Loss 1.5213
Epoch 2 Batch 400 Loss 1.4076
Epoch 2 Batch 600 Loss 1.3844
Epoch 2 Batch 800 Loss 1.3406
Epoch 2 Loss 1.4474
Time taken for 1 epoch 11.238277912139893 sec

Epoch 3 Batch 0 Loss 1.3092
Epoch 3 Batch 200 Loss 1.3917
Epoch 3 Batch 400 Loss 1.3230
Epoch 3 Batch 600 Loss 1.2934
Epoch 3 Batch 800 Loss 1.3181
Epoch 3 Loss 1.3306
Time taken for 1 epoch 11.668607473373413 sec

Epoch 4 Batch 0 Loss 1.2680
Epoch 4 Batch 200 Loss 1.2783
Epoch 4 Batch 400 Loss 1.2649
Epoch 4 Batch 600 Loss 1.2661
Epoch 4 Batch 800 Loss 1.2602
Epoch 4 Loss 1.2884
Time taken for 1 epoch 11.437989473342896 sec

Epoch 5 Batch 0 Loss 1.2651
Epoch 5 Batch 200 Loss 1.2512
Epoch 5 Batch 400 Loss 1.2826
Epoch 5 Batch 600 Loss 1.2284
Epoch 5 Batch 800 Loss

In [61]:
# Function to translate the sentence

def evaluate(sentence):


    # Convert the datatype to tensorflow-tensor
    inputs = tf.convert_to_tensor(sentence)

    # initialize to store the result
    result = ''

    # initialize hidden weights of encoder
    hidden = [tf.zeros([1, inp_lstm_cells]), tf.zeros([1, inp_lstm_cells])]

    # get the output of the encoder using initialized weights
    enc_out, enc_hidden = enc_model(inputs, hidden)

    # initialize decoder hidden weights same as encoder hidden weights
    dec_hidden = enc_hidden

    # intialize the decoder input as the tokenized form of 'start'
    dec_input = tf.expand_dims([tokenizer_output.word_index['start']], 0)

    # run for the maximum length of target language
    for t in range(length_output):

        # get the output of the encoder based on the outputs provided from encoder
        predictions, dec_hidden, attention_weights = dec_model(dec_input,
                                                             dec_hidden,
                                                             enc_out)

        # get the argument(token) for the word that is given maximum probability score by the model
        predicted_id = tf.argmax(predictions[0]).numpy()

        # check if 'end' is hit on the outputted word from decoder model
        if tokenizer_output.index_word[predicted_id] == 'end':
            return result, sentence

        # Get the word based on the token and add to result

        result += tokenizer_output.index_word[predicted_id]+ " "

        # the predicted ID is fed back into the model
        dec_input = tf.expand_dims([predicted_id], 0)

    return result, sentence

In [62]:
y_act=[]
for input in y_test:
  t=""
  t=t+tokenizer_output.index_word[input[1]]+" "
  t=t+tokenizer_output.index_word[input[2]]+" "
  t=t+tokenizer_output.index_word[input[3]]+" "
  y_act.append(t)


In [63]:
y_act

['1710 09 15 ',
 '1841 06 08 ',
 '1565 04 14 ',
 '2071 08 03 ',
 '1529 02 06 ',
 '1594 07 30 ',
 '1581 05 08 ',
 '1856 01 24 ',
 '2012 04 29 ',
 '1538 06 02 ',
 '1527 05 31 ',
 '1986 03 25 ',
 '1542 02 10 ',
 '2023 09 17 ',
 '1980 03 23 ',
 '1877 12 29 ',
 '1794 03 10 ',
 '1819 08 15 ',
 '1766 12 20 ',
 '1537 07 20 ',
 '1622 01 11 ',
 '1946 10 13 ',
 '1626 11 01 ',
 '2013 02 02 ',
 '1807 02 22 ',
 '2035 11 12 ',
 '1571 02 22 ',
 '1881 07 08 ',
 '1921 04 12 ',
 '1648 02 08 ',
 '1780 05 10 ',
 '1896 04 18 ',
 '2067 12 20 ',
 '1990 12 15 ',
 '1873 05 26 ',
 '1926 10 28 ',
 '1596 03 21 ',
 '1800 07 26 ',
 '1696 11 22 ',
 '1524 05 11 ',
 '1851 02 26 ',
 '1578 11 14 ',
 '1763 04 09 ',
 '1707 09 15 ',
 '1534 01 04 ',
 '1695 12 06 ',
 '1572 08 21 ',
 '1908 08 12 ',
 '1741 09 17 ',
 '1810 02 25 ',
 '1571 03 01 ',
 '1956 07 17 ',
 '1715 06 23 ',
 '1627 01 02 ',
 '1626 03 18 ',
 '1884 11 13 ',
 '2015 07 01 ',
 '2068 02 01 ',
 '2056 12 31 ',
 '1663 07 17 ',
 '1767 11 03 ',
 '1640 08 24 ',
 '1697 0

In [None]:
import tensorflow as tf

# Check if GPU is available
if tf.test.gpu_device_name():
    print('GPU is available')
else:
    print('GPU is NOT available')

# Assuming you've defined the evaluate function as mentioned earlier

result = []  # Initialize a list to store the results

# Assuming X_test is your input data
for i in range(len(X_test)):
    print(i)

    # Convert your input to a TensorFlow tensor and move it to the GPU if available
    input_tensor = tf.convert_to_tensor(X_test[[i]], dtype=tf.float32)
    if tf.test.is_gpu_available():
        input_tensor = input_tensor.gpu()

    out, sentence = evaluate(input_tensor)
    result.append(out)


In [69]:
result[0]

'1710 09 15 '

In [66]:
from sqlalchemy.engine.result import ResultInternal
# Define two arrays of strings
array1 = y_act
array2 = result

# Initialize a variable to keep track of matching elements
matching_count = 0

# Iterate through the elements of both arrays
for i in range(len(array1)):
    # Compare the elements and check for a match
    if array1[i] == array2[i]:
        matching_count += 1

# Calculate accuracy as the ratio of matching elements to the total number of elements
accuracy = (matching_count / len(array1)) * 100

# Print the accuracy
print(f"Accuracy: {accuracy:.2f}%")


Accuracy: 95.93%


In [67]:
error=100-accuracy
print(error)

4.074999999999989


In [68]:
pos=[]
# Initialize a variable to keep track of matching elements
for k in range(10):
  matching_count = 0

  # Iterate through the elements of both arrays
  for i in range(len(array1)):
      # Compare the elements and check for a match
      if array1[i][k] == array2[i][k]:
          matching_count += 1

  # Calculate accuracy as the ratio of matching elements to the total number of elements
  pos.append(matching_count)

# Print the accuracy
print(pos)

[7912, 7731, 7907, 7868, 8000, 7989, 7981, 8000, 7986, 7979]
