<a href="https://colab.research.google.com/github/SwarajLandge/Deep-Learning-AIC/blob/main/MachineTranslation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_table('mar.txt',usecols=[0,1],names=['eng', 'mar'])

In [5]:
df.sample(5)

Unnamed: 0,eng,mar
7981,I can do anything.,मी काहीही करू शकते.
28362,Tom came to Boston in 2013.,टॉम २०१३ साली बॉस्टनला आला.
33646,Who said we were going to die?,आपण मरणारच होतो असं कोण म्हणालं?
9107,Tom wants to play.,टॉमला खेळायचं आहे.
6403,I love Halloween.,मला हॅलोवीन खूप आवडतं.


In [6]:
# Lowercase all characters
df.eng = df.eng.apply(lambda x: x.lower())
df.mar = df.mar.apply(lambda x: x.lower())

In [7]:
print(df.eng[1000])
print(df.mar[1000])

i smell gas.
मला गॅसचा वास येतोय.


In [8]:
# Remove quotes
import re
df.eng=df.eng.apply(lambda x: re.sub("'", '', x))
df.mar=df.mar.apply(lambda x: re.sub("'", '', x))

In [9]:
print(df.eng[100])
print(df.mar[100])

im fat.
मी जाडा आहे.


In [10]:
 # Set of all special characters
 # Remove all the special characters
import string
exclude = set(string.punctuation)
df.eng = df.eng.apply(lambda x : ''.join(c for c in x if c not in exclude))
df.mar = df.mar.apply(lambda x : ''.join(c for c in x if c not in exclude))

In [11]:
# Remove all numbers from text
from string import digits
remove_digits = str.maketrans('', '', digits)
df.eng = df.eng.apply(lambda x: x.translate(remove_digits))
df.mar = df.mar.apply(lambda x: re.sub("[२३०८१५७९४६]", "", x))

In [12]:
# Remove extra spaces
df.eng = df.eng.apply(lambda x: x.strip())
df.mar = df.mar.apply(lambda x: x.strip())
df.eng = df.eng.apply(lambda x: re.sub(" +", " ", x))
df.mar = df.mar.apply(lambda x: re.sub(" +", " ", x))

In [13]:
# Add start and end tokens to target sequences
df.mar = df.mar.apply(lambda x : 'START_ '+ x + ' _END')

In [14]:
df.sample(5)

Unnamed: 0,eng,mar
6878,thats his house,START_ ते त्याचं घर आहे _END
142,back off,START_ हट _END
42893,this computer has a pentium microprocessor,START_ या कम्प्युटरमध्ये पेन्टियम मायक्रोप्रोस...
20544,turn the flashlight on,START_ बॅटरी चालू कर _END
135,who ran,START_ कोण पळालं _END


### We compute the vocabulary, vocabulary sizes and the length of maximum sequence for both English and Marathi

In [15]:
# Eng vocab
all_eng_words = set()
for eng in df.eng:
  for word in eng.split():
    if word not in all_eng_words:
      all_eng_words.add(word)

# Mar vocab
all_mar_words = set()
for mar in df.mar:
  for word in mar.split():
    if word not in all_mar_words:
      all_mar_words.add(word)

print(all_eng_words)
print(all_mar_words)

{'झालंय', 'बायकोचे', 'घडूच', 'आयुष्याने', 'जुनं', 'पुस्तकाबरोबर', 'वाजवतोय', 'जोकर', 'बकिंगहॅम', 'महिन्यांपूर्वी', 'पावसात', 'रिपब्लिकन', 'रोमनांने', 'जमिनीवरून', 'कसं', 'कसली', 'गरिबांविषयी', 'कठोर', 'नकार', 'हॅपी', 'डिसेंबरमध्ये', 'हलवता', 'ढोंग', 'ऑनलाईन', 'वाढदिवसाला', 'शिंकला', 'तलवार', 'असणे', 'फिरतो', 'कॉन्फरन्स', 'रिचार्जेबल', 'जेवायचो', 'गॅरेज', 'बाबांपेक्षा', 'मतं', 'सुशी', 'चमत्कारांवर', 'उठायची', 'प्रोग्रामर', 'सोडलेली', 'डाव्या', 'बॅल्टिक', 'प्लेटमधून', 'स्कॉटलंडमध्ये', 'चहाचा', 'लेखकांमधील', 'लपवतोय', 'गिर्\u200dहाईकच', 'बोलतं', 'सहन', 'ग्राफिक', 'बॉस्टनबाबत', 'सर्वर', 'लावली', 'लिनक्सबद्दल', 'तरुणपणेतच', 'ऑल्बनीपासून', 'थांबावं', 'कर्सर', 'उचलला', 'शेजार्\u200dयांना', 'साखरेबरोबर', 'चांसेलर्सविलची', 'कोणतेही', 'बुडवली', 'हिट', 'ब्लाऊज', 'पोहाल', 'जपानी', 'वर्णमालेतलं', 'आधुनिक', 'खोलीतील', 'थायलंडमध्ये', 'होतास', 'तलावाच्या', 'मूल', 'नाहीसा', 'तुझ्याइतकी', 'पोल', 'फाशी', 'पेसो', 'युरोपचा', 'आगीपाशी', 'आजाराचा', 'घड्याळाप्रमाणे', 'सापडलात', 'गोमांस', 'जमला', 'पाठवला', 'सम

In [16]:
# Max length of Eng seq
eng_length = []
for l in df.eng:
  eng_length.append(len(l.split(' ')))
max_length_src = np.max(eng_length)
print(max_length_src)

34


In [17]:
# Max length of Mar seq
mar_length = []
for l in df.mar:
  mar_length.append(len(l.split(' ')))
max_length_tar = np.max(mar_length)
print(max_length_tar)

37


In [18]:
input_words = sorted(list(all_eng_words))
target_words = sorted(list(all_mar_words))

In [19]:
# vocab size
num_encoder_tokens = len(all_eng_words)
num_decoder_tokens = len(all_mar_words)
print("eng vocab size",num_encoder_tokens)
print("mar vocab size",num_decoder_tokens)

eng vocab size 5814
mar vocab size 14212


In [20]:
# For zero padding
num_decoder_tokens += 1 

In [21]:
# Create word to token dictionary for both source and target

input_token_index = dict([(word, i+1) for i, word in enumerate(input_words)])
target_token_index = dict([(word, i+1) for i, word in enumerate(target_words)])
print(input_token_index)
print(target_token_index)

{'10': 1, '100': 2, '300': 3, 'START_': 4, '_END': 5, 'a': 6, 'b': 7, 'h₂o': 8, 'ntt': 9, 'uk': 10, 'अ': 11, 'अँकरेजमार्गे': 12, 'अँजिलीस': 13, 'अँटीव्हायरस': 14, 'अँड': 15, 'अँडी': 16, 'अँडीजमधल्या': 17, 'अँब्युलन्स': 18, 'अंक': 19, 'अंकांनी': 20, 'अंकारा': 21, 'अंकारात': 22, 'अंग': 23, 'अंगठ्या': 24, 'अंगरक्षकांची': 25, 'अंगाला': 26, 'अंगोला': 27, 'अंघोळ': 28, 'अंठ्यांपेक्षा': 29, 'अंड': 30, 'अंडरवेअर': 31, 'अंडी': 32, 'अंडींचा': 33, 'अंडीसुद्धा': 34, 'अंड्यांवर': 35, 'अंड्यांसहित': 36, 'अंत': 37, 'अंतर': 38, 'अंतराळयात्री': 39, 'अंतराळयान': 40, 'अंतराळयानाचं': 41, 'अंतराळयानातून': 42, 'अंतराळवीर': 43, 'अंतराळात': 44, 'अंतरित': 45, 'अंतिम': 46, 'अंदाज': 47, 'अंदाजे': 48, 'अंधार': 49, 'अंधाराची': 50, 'अंधारात': 51, 'अंमलात': 52, 'अंशांकित': 53, 'अकरा': 54, 'अकरावीत': 55, 'अकल्पनीय': 56, 'अकाउंट': 57, 'अकाउंटमध्ये': 58, 'अकॉर्डियन': 59, 'अक्कलदाढ': 60, 'अक्वॅरियममध्ये': 61, 'अक्षर': 62, 'अक्षरं': 63, 'अक्षरांनी': 64, 'अक्षरांमध्ये': 65, 'अक्षरे': 66, 'अखाद्य': 67, 'अखिलेउस': 68, 'अख्खं

In [22]:
# Create token to word dictionary for both source and target
reverse_input_char_index = dict((i, word) for word, i in input_token_index.items())
reverse_target_char_index = dict((i, word) for word, i in target_token_index.items())
print(reverse_input_char_index)
print(reverse_target_char_index)

{1: '10', 2: '100', 3: '300', 4: 'START_', 5: '_END', 6: 'a', 7: 'b', 8: 'h₂o', 9: 'ntt', 10: 'uk', 11: 'अ', 12: 'अँकरेजमार्गे', 13: 'अँजिलीस', 14: 'अँटीव्हायरस', 15: 'अँड', 16: 'अँडी', 17: 'अँडीजमधल्या', 18: 'अँब्युलन्स', 19: 'अंक', 20: 'अंकांनी', 21: 'अंकारा', 22: 'अंकारात', 23: 'अंग', 24: 'अंगठ्या', 25: 'अंगरक्षकांची', 26: 'अंगाला', 27: 'अंगोला', 28: 'अंघोळ', 29: 'अंठ्यांपेक्षा', 30: 'अंड', 31: 'अंडरवेअर', 32: 'अंडी', 33: 'अंडींचा', 34: 'अंडीसुद्धा', 35: 'अंड्यांवर', 36: 'अंड्यांसहित', 37: 'अंत', 38: 'अंतर', 39: 'अंतराळयात्री', 40: 'अंतराळयान', 41: 'अंतराळयानाचं', 42: 'अंतराळयानातून', 43: 'अंतराळवीर', 44: 'अंतराळात', 45: 'अंतरित', 46: 'अंतिम', 47: 'अंदाज', 48: 'अंदाजे', 49: 'अंधार', 50: 'अंधाराची', 51: 'अंधारात', 52: 'अंमलात', 53: 'अंशांकित', 54: 'अकरा', 55: 'अकरावीत', 56: 'अकल्पनीय', 57: 'अकाउंट', 58: 'अकाउंटमध्ये', 59: 'अकॉर्डियन', 60: 'अक्कलदाढ', 61: 'अक्वॅरियममध्ये', 62: 'अक्षर', 63: 'अक्षरं', 64: 'अक्षरांनी', 65: 'अक्षरांमध्ये', 66: 'अक्षरे', 67: 'अखाद्य', 68: 'अखिलेउस', 69: 'अ

In [23]:
from sklearn.utils import shuffle
df = shuffle(df)
df.head(10)

Unnamed: 0,eng,mar
36694,tom is dancing with another girl,START_ टॉम एका दुसर्‍या मुलीबरोबर नाचतोय _END
42896,to tell the truth he is not a human being,START_ खरं सांगू तर तो मानव नाही आहे _END
9645,youre so perfect,START_ तुम्ही किती परिपूर्ण आहात _END
7594,are you going out,START_ तू बाहेर जात आहेस का _END
24615,tom thinks mary will win,START_ टॉमला वाटतं की मेरी जिंकेल _END
28167,thats not your fault tom,START_ टॉम ती तुझी चूक नाहीये _END
933,how are you,START_ कसा आहेस _END
20713,what time can you come,START_ तुला किती वाजता येता येईल _END
23914,im going to be an uncle,START_ मी काका बनणार आहे _END
29382,i dont know where you work,START_ तू कुठे काम करतोस मला माहीत नाही _END


In [24]:
# Train - Test Split

from sklearn.model_selection import train_test_split
X, y = df.eng, df.mar
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1)
X_train.shape, X_test.shape

((40709,), (4524,))

In [25]:
# Save the train and test dataframes for reproducing the results later, as they are shuffled.

X_train.to_pickle('X_train.pkl')
X_test.to_pickle('X_test.pkl')

In [26]:
def generate_batch(X = X_train, y = y_train, batch_size = 128):
    ''' Generate a batch of data '''
    while True:
        for j in range(0, len(X), batch_size):
            encoder_input_data = np.zeros((batch_size, max_length_src),dtype='float32')
            decoder_input_data = np.zeros((batch_size, max_length_tar),dtype='float32')
            decoder_target_data = np.zeros((batch_size, max_length_tar, num_decoder_tokens),dtype='float32')
            for i, (input_text, target_text) in enumerate(zip(X[j:j+batch_size], y[j:j+batch_size])):
                for t, word in enumerate(input_text.split()):
                    encoder_input_data[i, t] = input_token_index[word] # encoder input seq
                for t, word in enumerate(target_text.split()):
                    if t<len(target_text.split())-1:
                        decoder_input_data[i, t] = target_token_index[word] # decoder input seq
                    if t>0:
                        # decoder target sequence (one hot encoded)
                        # does not include the START_ token
                        # Offset by one timestep
                        decoder_target_data[i, t - 1, target_token_index[word]] = 1.
            yield([encoder_input_data, decoder_input_data], decoder_target_data)

In [27]:
# Encoder - Decoder Model Architecture
from keras.layers import Input, LSTM, Embedding, Dense
from keras.models import Model

latent_dim = 50

#Encoder
encoder_inputs = Input(shape=(None,))
enc_emb =  Embedding(num_encoder_tokens, latent_dim, mask_zero = True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(enc_emb)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]

# Decoder
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))
dec_emb_layer = Embedding(num_decoder_tokens, latent_dim, mask_zero = True)
dec_emb = dec_emb_layer(decoder_inputs)
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb,
                                     initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [28]:
# compile
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])

In [29]:
train_samples = len(X_train)
val_samples = len(X_test)
batch_size = 128
epochs = 50

model.fit_generator(generator = generate_batch(X_train, y_train, batch_size = batch_size),
                    steps_per_epoch = train_samples//batch_size,
                    epochs=epochs,
                    validation_data = generate_batch(X_test, y_test, batch_size = batch_size),
                    validation_steps = val_samples//batch_size)

  # Remove the CWD from sys.path while we load stuff.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f3982ac5990>

In [30]:
# save weigths
model.save_weights('nmt_weights.h5')
# Load the weights, if you close the application
model.load_weights('nmt_weights.h5')

In [31]:
# Inference Setup

# Encode the input sequence to get the "thought vectors"
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

# Get the embeddings of the decoder sequence
dec_emb2= dec_emb_layer(decoder_inputs)

# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=decoder_states_inputs)
decoder_states2 = [state_h2, state_c2]

# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_outputs2 = decoder_dense(decoder_outputs2)

# Final decoder model
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs2] + decoder_states2)

In [32]:
# Inference Loop
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)
    
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = target_token_index['START_']
    
    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += ' '+sampled_char
        
        # Exit condition: either hit max length or find stop token.
        if (sampled_char == '_END' or len(decoded_sentence) > 50):
            stop_condition = True
        
        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index
        
        # Update states
        states_value = [h, c]
    
    return decoded_sentence

In [33]:
# Evaluation on Train Dataset

train_gen = generate_batch(X_train, y_train, batch_size = 1)
k=-1

k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Marathi Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: you and i want the same things
Actual Marathi Translation:  तुला आणि मला त्याच गोष्टी हव्या आहेत 
Predicted Marathi Translation:  तू मला आणि हवं असतं 


In [34]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Marathi Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: i feel like im forgetting something
Actual Marathi Translation:  मी काहीतरी विसरतोय असं मला वाटत आहे 
Predicted Marathi Translation:  मी काहीतरी काहीतरी ओळखतो 


In [35]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Marathi Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: ill do it now
Actual Marathi Translation:  मी ते आता करेन 
Predicted Marathi Translation:  मी आता ते करू 


In [36]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Marathi Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: tom started packing
Actual Marathi Translation:  टॉमने पॅकिंगला सुरुवात केली 
Predicted Marathi Translation:  टॉमने सुरुवात केली 


In [37]:
# Evaluation on test data

val_gen = generate_batch(X_test, y_test, batch_size = 1)
k=-1

In [38]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Marathi Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: that was your mistake
Actual Marathi Translation:  ती तुमची चूक होती 
Predicted Marathi Translation:  ती चूक होती 


In [39]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Marathi Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: tom bought some potatoes
Actual Marathi Translation:  टॉमने काही बटाटे विकत घेतले 
Predicted Marathi Translation:  टॉमने बटाटे विकत घेतलं 


In [40]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Marathi Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: what is she doing in his office
Actual Marathi Translation:  ती त्यांच्या ऑफिसमध्ये काय करत आहे 
Predicted Marathi Translation:  त्या आपल्या ऑफिसमध्ये काय करत आहेत 


In [41]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Marathi Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: tom is wearing a bathrobe
Actual Marathi Translation:  टॉमने बाथरोब घातलेला आहे 
Predicted Marathi Translation:  टॉमने एक ड्रेस घातला आहे 


In [42]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Marathi Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: tell tom i wont be there
Actual Marathi Translation:  टॉमला सांग की मी तिथे नसेन 
Predicted Marathi Translation:  टॉम इथे काय झालं हे मला माहीत नव्हतं 


In [43]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Marathi Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Marathi Translation:', decoded_sentence[:-4])

Input English sentence: tell tom to wait
Actual Marathi Translation:  टॉमला थांबायला सांगा 
Predicted Marathi Translation:  टॉमला सांग 


What can we conclude?
Even though the results are not the best, they are not that bad as well. Certainly much better than what a randomly generated sequence would result in. In some sentences we can even note that the words predicted are not correct but they are semantically quite close to the correct words.

Also, another point to be noticed is that the results on training set are a bit better than the results on test set, which indicates that the model might be over-fitting a bit.

9. Future Work
If you are interested to improve the quality, you can try out below measures:

a. Get much more data. Top quality translators are trained on millions of sentence pairs.

b. Build more complex models like Attention.

c. Use dropout and other forms of regularization techniques to mitigate over-fitting.

d. Perform Hyper-parameter tuning. Play with learning rate, batch size, dropout rate, etc. Try using bidirectional Encoder LSTM. Try using multi-layered LSTMs.

e. Try using beam search instead of a greedy approach.

f. Try BLEU score to evaluate your model.

g. The list is never ending and goes on.