# NEURAL MACHINE TRANSLATION - GRU with Attention

## Required Module & Config files

In [1]:
import src.RNN_GRUAttention as gruANMT
from src.Tokenizer import Corpus, LangData, dataLoader
from src.utils import load_config, get_device, train_model, sentence_bleu, corpus_bleu
from src.TranslatorAtt import TranslatorAtt
from torch.nn import CrossEntropyLoss
from torch.optim import NAdam
import evaluate
import numpy as np
from torchinfo import summary

# Loading config file
config = load_config()
# Get device : GPU/MPS Back-End/CPU
device = get_device()
print(f"Using device: {device}")

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


Using device: mps


## Load the dataset

In [2]:
# Encoder-Source
english_data = Corpus(f"{config.TRAIN_DATA}/english.txt", "English")
afrikaans_data = Corpus(f"{config.TRAIN_DATA}/afrikaans.txt", "Afrikaans")

## Set Hyperparameters

In [3]:
# Encoder - source
IN_ENCODER = english_data.vocab_size
ENCODER_EMB = 256

# Decoder - target
IN_DECODER = afrikaans_data.vocab_size
OUT_DECODER = afrikaans_data.vocab_size
DECODER_EMB = 256

# Shared
HIDDEN_SIZE = 1024
NUM_LAYERS = 2

LR = 1e-3
BATCH_SIZE = 128

## Set the model

In [4]:
encoder_net = gruANMT.Encoder(IN_ENCODER, ENCODER_EMB, HIDDEN_SIZE, NUM_LAYERS, type='GRU').to(device)
decoder_net = gruANMT.Decoder(IN_DECODER, DECODER_EMB, HIDDEN_SIZE, NUM_LAYERS, type='GRU').to(device)
model = gruANMT.RNNAtt(encoder_net, decoder_net, OUT_DECODER)

summary(model)

Layer (type:depth-idx)                   Param #
RNNAtt                                   --
├─Encoder: 1-1                           --
│    └─GRU: 2-1                          10,235,904
│    └─Embedding: 2-2                    756,480
├─Decoder: 1-2                           --
│    └─GRU: 2-3                          10,235,904
│    └─Embedding: 2-4                    743,424
│    └─Linear: 2-5                       5,950,296
Total params: 27,922,008
Trainable params: 27,922,008
Non-trainable params: 0

In [5]:
train_data = LangData(english_data, afrikaans_data)
train_loader = dataLoader(train_data, BATCH_SIZE)

pad_idx = afrikaans_data.stoi['<pad>']
criterion = CrossEntropyLoss(ignore_index=0)

optimizer = NAdam(model.parameters(), LR)
translator = TranslatorAtt(model, english_data, afrikaans_data, device)

In [6]:
# Data used for follow-up durring training
mytext = "<sos> given that we represent the target output as $y\in\{0,1\}$ and we have $n$ training points , we can write the negative log likelihood of the parameters as follows : <eos>"
ground = "<sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>"

predicted = translator.translate_sentence(mytext)
bleu = sentence_bleu(prediction=[predicted], reference=[ground])
print(f"Pred: {predicted}")
print(f"Refe: {ground}")
print(f"BLEU SCORES: {bleu}")

Pred: <sos> (all-pass) <ltx> <ltx> d.w.s.\ d.w.s.\ daaraan hoes hoe hoe breek breek "al "al "al sonbrille hulle $c$ $c$ oop vredesverdrag zero-gemiddelde zero-gemiddelde haal geeis bespreek geeis verdieping verdieping trui vele vele
Refe: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU SCORES: [0.186, 0.08, 0.048, 0.0]


## Train the data

In [7]:
EPOCHS = 15
params = {
    "model": model,
    "train_loader": train_loader,
    "optimizer": optimizer,
    "criterion": criterion,
    "device": device,
    "epochs": EPOCHS,
    "source_test": mytext,
    "reference": ground,
	"translator":translator
}

train_loss = train_model(**params)
np.save('gru_att_train_loss.npy', np.array(train_loss))

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.
Epoch 1/15: 100%|██████████| 20/20 [00:11<00:00,  1.67batch/s, loss=1.806]


Predicted: <sos> die die van die die van die , , , , , en die , , , , en die , , , , en die , , , , en
BLEU Score: [0.178, 0.096, 0.063, 0.0]


Epoch 2/15: 100%|██████████| 20/20 [00:11<00:00,  1.76batch/s, loss=1.494]


Predicted: <sos> ons het die volgende datastel en die volgende -gemiddelde , en ons het die volgende datastel en die volgende -gemiddelde in die volgende bladsy , en die volgende -gemiddelde , en
BLEU Score: [0.279, 0.148, 0.084, 0.0]


Epoch 3/15: 100%|██████████| 20/20 [00:11<00:00,  1.76batch/s, loss=1.251]


Predicted: <sos> ons het die data op die volgende verandering verandering , en die \% verandering in die usd/euro verandering , die \% verandering verandering , en die \% verandering in die usd/euro
BLEU Score: [0.317, 0.126, 0.074, 0.0]


Epoch 4/15: 100%|██████████| 20/20 [00:11<00:00,  1.78batch/s, loss=0.979]


Predicted: <sos> ons het die data van die oorspronklike oorspronklike oorspronklike , en ons kan die negatiewe as as : en die $n$ van die oorspronklike sein , kan as die outokorrelasie as
BLEU Score: [0.486, 0.299, 0.169, 0.0]


Epoch 5/15: 100%|██████████| 20/20 [00:11<00:00,  1.78batch/s, loss=0.771]


Predicted: <sos> ons het die teikenuittree voorstel en ons het afrigpunte , dan die negatiewe log-waarskynlikheidskostefunksie log-waarskynlikheidskostefunksie log-waarskynlikheidskostefunksie log-waarskynlikheidskostefunksie die $2n$ log-waarskynlikheidskostefunksie log-waarskynlikheidskostefunksie as ons afrigpunte afrigpunte afrigpunte : , en ons afrigpunte
BLEU Score: [0.537, 0.356, 0.218, 0.0]


Epoch 6/15: 100%|██████████| 20/20 [00:11<00:00,  1.76batch/s, loss=0.592]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , het ons die negatiewe van die oorsprong al , en ons kan as ons die negatiewe voorstel as
BLEU Score: [0.711, 0.673, 0.649, 0.629]


Epoch 7/15: 100%|██████████| 20/20 [00:10<00:00,  1.82batch/s, loss=0.447]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf <eos>
BLEU Score: [0.947, 0.934, 0.92, 0.906]


Epoch 8/15: 100%|██████████| 20/20 [00:11<00:00,  1.78batch/s, loss=0.369]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ $n$ van $n$ monsters , en dan weer die $k$ -gemiddelde algoritme hardloop , kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as
BLEU Score: [0.647, 0.613, 0.591, 0.573]


Epoch 9/15: 100%|██████████| 20/20 [00:11<00:00,  1.78batch/s, loss=0.315]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [0.975, 0.975, 0.965, 0.954]


Epoch 10/15: 100%|██████████| 20/20 [00:11<00:00,  1.74batch/s, loss=0.290]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 11/15: 100%|██████████| 20/20 [00:11<00:00,  1.78batch/s, loss=0.271]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 12/15: 100%|██████████| 20/20 [00:11<00:00,  1.78batch/s, loss=0.259]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 13/15: 100%|██████████| 20/20 [00:11<00:00,  1.79batch/s, loss=0.251]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 14/15: 100%|██████████| 20/20 [00:10<00:00,  1.91batch/s, loss=0.237]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 15/15: 100%|██████████| 20/20 [00:11<00:00,  1.73batch/s, loss=0.257]

Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]





## Evauate on the Training set

In [8]:
EN_SRC = [' '.join(sent) for sent in english_data.data_str]
AF_REF = [[' '.join(sent)] for sent in afrikaans_data.data_str]
TRANSLATED = [translator.translate_sentence(sent) for sent in EN_SRC]
corpus_bleu(TRANSLATED, AF_REF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.8182694072306845
precisions          : [0.8182694072306845]
brevity_penalty     : 1.0
length_ratio        : 1.1823803840729088
translation_length  : 43592
reference_length    : 36868
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.8026146592852926
precisions          : [0.8182694072306845, 0.7872594106625788]
brevity_penalty     : 1.0
length_ratio        : 1.1823803840729088
translation_length  : 43592
reference_length    : 36868
******************************************************************************************
                                     BLE

## Evaluate on the Test set

In [9]:
with open(f"{config.VAL_DATA}/english.txt") as data:
    english_val = data.read().strip().split("\n")
with open(f"{config.VAL_DATA}/afrikaans.txt") as data:
    afrikaans_val = data.read().strip().split("\n")

### Greedy Search

In [10]:
VAL_AF_REF = [[sent] for sent in afrikaans_val]

VAL_TRANSLATED = [translator.translate_sentence(sent) for sent in english_val]

corpus_bleu(VAL_TRANSLATED, VAL_AF_REF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.5443267433297456
precisions          : [0.5443267433297456]
brevity_penalty     : 1.0
length_ratio        : 1.2048184548184548
translation_length  : 17653
reference_length    : 14652
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.453736991709145
precisions          : [0.5443267433297456, 0.3782236683538202]
brevity_penalty     : 1.0
length_ratio        : 1.2048184548184548
translation_length  : 17653
reference_length    : 14652
******************************************************************************************
                                     BLEU

### Beam search

In [11]:
VAL_TRANSLATED = [translator.translate_sentence(sent, method="beam", beam_width=3) for sent in english_val]
corpus_bleu(VAL_TRANSLATED, VAL_AF_REF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.6174465227403191
precisions          : [0.6751672862453532]
brevity_penalty     : 0.9145089451445096
length_ratio        : 0.917963417963418
translation_length  : 13450
reference_length    : 14652
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.5209778134794423
precisions          : [0.6751672862453532, 0.4806745743564916]
brevity_penalty     : 0.9145089451445096
length_ratio        : 0.917963417963418
translation_length  : 13450
reference_length    : 14652
******************************************************************************************
            

## Evaluate on the SUN validation set only

In [12]:
with open(f"{config.VAL_DATA}/sun_english.txt") as data:
    sun_english_val = data.read().strip().split("\n")
with open(f"{config.VAL_DATA}/sun_afrikaans.txt") as data:
    sun_afrikaans_val = data.read().strip().split("\n")

### Greedy Search

In [13]:
SUN_VAL_AF = [[sent] for sent in sun_afrikaans_val]
SUN_VAL_TRANSLATED = [translator.translate_sentence(sent) for sent in sun_english_val]
corpus_bleu(SUN_VAL_TRANSLATED, SUN_VAL_AF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.36047037539574855
precisions          : [0.36047037539574855]
brevity_penalty     : 1.0
length_ratio        : 1.1594126900891453
translation_length  : 4422
reference_length    : 3814
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.260630508150112
precisions          : [0.36047037539574855, 0.1884433962264151]
brevity_penalty     : 1.0
length_ratio        : 1.1594126900891453
translation_length  : 4422
reference_length    : 3814
******************************************************************************************
                                     BLEU-

### Beam Search

In [14]:
SUN_VAL_TRANSLATED = [translator.translate_sentence(sent, method="beam", beam_width=3) for sent in sun_english_val]
corpus_bleu(SUN_VAL_TRANSLATED, SUN_VAL_AF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.414413385573026
precisions          : [0.5066120906801007]
brevity_penalty     : 0.8180092682286704
length_ratio        : 0.8327215521761929
translation_length  : 3176
reference_length    : 3814
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.31240914223256827
precisions          : [0.5066120906801007, 0.2879091516366065]
brevity_penalty     : 0.8180092682286704
length_ratio        : 0.8327215521761929
translation_length  : 3176
reference_length    : 3814
******************************************************************************************
              

In [15]:
metric = evaluate.load("bleu")
predictions = [translator.translate_sentence(sent, method="beam", beam_width=5) for sent in sun_english_val[10:20]]
labels = SUN_VAL_AF[10:20]
for source, pred, lab in zip(sun_english_val[10:20],predictions, labels):
    print(f"Source    : {source}")
    print(f"Prediction: {pred[:150]}")
    print(f"Label     : {lab[0][:150]}")
    print(f"BLEU      : {metric.compute(predictions=[pred], references=lab)['bleu']}")
    print()

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


Source    : <sos> component <eos>
Prediction: <sos> hallo hallo <eos>
Label     : <sos> komponent <eos>
BLEU      : 0.0

Source    : <sos> architecture <eos>
Prediction: <sos> hallo hallo <eos>
Label     : <sos> argitektuur <eos>
BLEU      : 0.0

Source    : <sos> specification <eos>
Prediction: <sos> hallo hallo <eos>
Label     : <sos> spesifikasies <eos>
BLEU      : 0.0

Source    : <sos> at which stage of the design process would we choose the communication protocol between subsystems <eos>
Prediction: <sos> watter van die oorsprong sal lei <eos>
Label     : <sos> by watter stap van die ontwerpsproses word die kommunikasie-kanaal tussen substelsels gekies <eos>
BLEU      : 0.0

Source    : <sos> motivate your answer <eos>
Prediction: <sos> motiveer jou vrou <eos>
Label     : <sos> motiveer jou antwoord <eos>
BLEU      : 0.5969491792019646

Source    : <sos> describe the meaning if a system is described as a cyber-physical system <eos>
Prediction: <sos> 'n stelsel word deur 'n monste