# NEURAL MACHINE TRANSLATION - GRU

## Required Module & Config files

In [1]:
import src.RNN_GRU as gruNMT
from src.Tokenizer import Corpus, LangData, dataLoader
from src.utils import load_config, get_device, train_model, sentence_bleu, corpus_bleu
from src.Translator import Translator
from torch.nn import CrossEntropyLoss
from torch.optim import NAdam
from torchinfo import summary

# Loading config file
config = load_config()
# Get device : GPU/MPS Back-End/CPU
device = get_device()
print(f"Using device: {device}")

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


Using device: mps


## Load the dataset

In [2]:
# Encoder-Source
english_data = Corpus(f"{config.TRAIN_DATA}/english.txt", "English")
afrikaans_data = Corpus(f"{config.TRAIN_DATA}/afrikaans.txt", "Afrikaans")

## Set Hyperparameters

In [3]:
# Encoder - source
IN_ENCODER = english_data.vocab_size
ENCODER_EMB = 256

# Decoder - target
IN_DECODER = afrikaans_data.vocab_size
OUT_DECODER = afrikaans_data.vocab_size
DECODER_EMB = 256

# Shared
HIDDEN_SIZE = 1024
NUM_LAYERS = 2

LR = 1e-3
BATCH_SIZE = 128

## Set the model

In [4]:
encoder_net = gruNMT.Encoder(IN_ENCODER, ENCODER_EMB, HIDDEN_SIZE, NUM_LAYERS, type="GRU").to(device)
decoder_net = gruNMT.Decoder(IN_DECODER, DECODER_EMB, HIDDEN_SIZE, NUM_LAYERS, type='GRU').to(device)
model = gruNMT.GRU_NMT(encoder_net, decoder_net, OUT_DECODER)

summary(model, depth=4)

Layer (type:depth-idx)                   Param #
GRU_NMT                                  --
├─Encoder: 1-1                           --
│    └─GRU: 2-1                          10,235,904
│    └─Embedding: 2-2                    743,936
├─Decoder: 1-2                           --
│    └─GRU: 2-3                          10,235,904
│    └─Embedding: 2-4                    737,024
│    └─Linear: 2-5                       2,950,975
Total params: 24,903,743
Trainable params: 24,903,743
Non-trainable params: 0

In [5]:
train_data = LangData(english_data, afrikaans_data)
train_loader = dataLoader(train_data, BATCH_SIZE)

pad_idx = afrikaans_data.stoi['<pad>']
criterion = CrossEntropyLoss(ignore_index=0)

optimizer = NAdam(model.parameters(), LR)
translator = Translator(model, english_data, afrikaans_data, device)

In [6]:
# Data used for follow-up durring training
mytext = "<sos> given that we represent the target output as $y\in\{0,1\}$ and we have $n$ training points , we can write the negative log likelihood of the parameters as follows: <eos>"
ground = "<sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as: <eos>"

predicted = translator.translate_sentence(mytext)
bleu = sentence_bleu(prediction=[predicted], reference=[ground])
print(f"Pred: {predicted}")
print(f"Refe: {ground}")
print(f"BLEU SCORES: {bleu}")

Pred: <sos> "aangesien gelieg maand bestaan rêrig rêrig kind na\"iewe moenie alby digter. wat assosiatief assosiatief besoek." okal okal arms spel haal $y$ politieke log-waarskynlikheidskostefunksie hoekom hoekom periodiese vurige vurige nou-nou nou-nou gemonsterde gemonsterde stout dom wat wat verwerk kenmerke verwerk kenmerke kanada $\mathbf{x}\in\mathbb{r}^d$ nie; verwerk ten bloed verwerk projeksie kanada pas voorafbetaalde kak ontspanningsaktiwiteit spaans." gebly gebly gebly binnegegaan meerveranderlike liefde reguit vergadering vergadering vergadering handvol handvol 2012 \texttt{student} springmielies geboren motorsleutels motorsleutels (statistiese liefde iemand gelees binnegegaan enigiets binnegegaan $z$ 200 200 $\omega=\frac{\pi}{6}$ partymal partymal partymal "alles opgemerk voorspel soek soek spesieke prototipe spesieke $f_s=5$ $f_s=5$ vou vou oorsaak oorsaak ek verversings meerveranderlike meerveranderlike sluit beste beteken optimaal blou. vitamien sein geboren zoek ""ek

## Train the data

In [7]:
EPOCHS = 20
params = {
    "model": model,
    "train_loader": train_loader,
    "optimizer": optimizer,
    "criterion": criterion,
    "device": device,
    "epochs": EPOCHS,
    "source_test": mytext,
    "reference": ground,
	"translator":translator
}

train_model(**params)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.
Epoch 1/20: 100%|██████████| 20/20 [00:08<00:00,  2.37batch/s, loss=1.734]


Predicted: <sos> die die die die die <eos>
BLEU Score: [0.057, 0.042, 0.031, 0.0]


Epoch 2/20: 100%|██████████| 20/20 [00:07<00:00,  2.60batch/s, loss=1.535]


Predicted: <sos> die die die die die die die die die die die die <eos>
BLEU Score: [0.138, 0.101, 0.073, 0.0]


Epoch 3/20: 100%|██████████| 20/20 [00:07<00:00,  2.51batch/s, loss=1.414]


Predicted: <sos> ons die die die die wat ons die die die te , , die die te <eos>
BLEU Score: [0.231, 0.175, 0.112, 0.0]


Epoch 4/20: 100%|██████████| 20/20 [00:07<00:00,  2.60batch/s, loss=1.271]


Predicted: <sos> ons wil die data om die die te te ons die die te minimeer , ons die die die die <eos>
BLEU Score: [0.28, 0.202, 0.127, 0.0]


Epoch 5/20: 100%|██████████| 20/20 [00:07<00:00,  2.56batch/s, loss=1.170]


Predicted: <sos> ons wil die data data , , ons die data , , , die die <eos>
BLEU Score: [0.222, 0.154, 0.102, 0.0]


Epoch 6/20: 100%|██████████| 20/20 [00:07<00:00,  2.60batch/s, loss=0.997]


Predicted: <sos> as ons die die die die die die die die die usd/euro , en ons die die verandering as ons die die verandering in die die <eos>
BLEU Score: [0.406, 0.309, 0.235, 0.181]


Epoch 7/20: 100%|██████████| 20/20 [00:08<00:00,  2.49batch/s, loss=0.901]


Predicted: <sos> veronderstel ons ons die eerder wat ons kan ons eerder , en ons eerder , die dan , sal ons eerder <eos>
BLEU Score: [0.356, 0.248, 0.147, 0.0]


Epoch 8/20: 100%|██████████| 20/20 [00:07<00:00,  2.54batch/s, loss=0.775]


Predicted: <sos> as ons die teikenuittree van die gevolglike klassifikasie-gebiede ons ons ons ons data as ons basismodel <eos>
BLEU Score: [0.294, 0.227, 0.193, 0.162]


Epoch 9/20: 100%|██████████| 20/20 [00:07<00:00,  2.57batch/s, loss=0.655]


Predicted: <sos> as ons die teikenuittree voorstel as ons ons ons kan dat ons die \% verandering dat ons die \% verandering dat ons die \% verandering in die \% verandering dat ons die \% verandering in die duitse mark <eos>
BLEU Score: [0.408, 0.306, 0.252, 0.213]


Epoch 10/20: 100%|██████████| 20/20 [00:08<00:00,  2.46batch/s, loss=0.572]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons afrigpunte afrigpunte ons afrigpunte ons afrigpunte ons (pca) het <eos>
BLEU Score: [0.688, 0.63, 0.599, 0.573]


Epoch 11/20: 100%|██████████| 20/20 [00:07<00:00,  2.58batch/s, loss=0.454]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons kan as ons eerder as basismodel wat ons wil doen <eos>
BLEU Score: [0.686, 0.628, 0.597, 0.572]


Epoch 12/20: 100%|██████████| 20/20 [00:08<00:00,  2.44batch/s, loss=0.389]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons ons afrigpunte ons afrigpunte ons dan , dan , die negatiewe log-waarskynlikheidskostefunksie log-waarskynlikheidskostefunksie ons <eos>
BLEU Score: [0.795, 0.723, 0.667, 0.621]


Epoch 13/20: 100%|██████████| 20/20 [00:08<00:00,  2.44batch/s, loss=0.339]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte ons kan ons die negatiewe negatiewe as as <eos>
BLEU Score: [0.843, 0.802, 0.768, 0.734]


Epoch 14/20: 100%|██████████| 20/20 [00:07<00:00,  2.55batch/s, loss=0.296]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan ons die negatiewe log-waarskynlikheidskostefunksie as ons stompies as basismodel gebruik <eos>
BLEU Score: [0.878, 0.838, 0.805, 0.773]


Epoch 15/20: 100%|██████████| 20/20 [00:08<00:00,  2.38batch/s, loss=0.287]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons ons negatiewe log-waarskynlikheidskostefunksie skryf as ons <eos>
BLEU Score: [0.949, 0.921, 0.893, 0.862]


Epoch 16/20: 100%|██████████| 20/20 [00:07<00:00,  2.54batch/s, loss=0.256]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 17/20: 100%|██████████| 20/20 [00:08<00:00,  2.49batch/s, loss=0.261]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as ons <eos>
BLEU Score: [0.974, 0.961, 0.947, 0.932]


Epoch 18/20: 100%|██████████| 20/20 [00:07<00:00,  2.65batch/s, loss=0.243]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 19/20: 100%|██████████| 20/20 [00:07<00:00,  2.50batch/s, loss=0.254]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 20/20: 100%|██████████| 20/20 [00:07<00:00,  2.53batch/s, loss=0.248]

Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]





## Evaluate on the training set

In [8]:
EN_SRC = [' '.join(sent) for sent in english_data.data_str]
AF_REF = [[' '.join(sent)] for sent in afrikaans_data.data_str]
TRANSLATED = [translator.translate_sentence(sent) for sent in EN_SRC]
corpus_bleu(TRANSLATED, AF_REF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.9954484331688467
precisions          : [0.996393611540443]
brevity_penalty     : 0.9990514006105127
length_ratio        : 0.999051850246519
translation_length  : 36879
reference_length    : 36914
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.9934852671204528
precisions          : [0.996393611540443, 0.9924674267100977]
brevity_penalty     : 0.9990514006105127
length_ratio        : 0.999051850246519
translation_length  : 36879
reference_length    : 36914
******************************************************************************************
              

## Evaluate on the validation set

In [9]:
with open(f"{config.VAL_DATA}/english.txt") as data:
    english_val = data.read().strip().split("\n")
with open(f"{config.VAL_DATA}/afrikaans.txt") as data:
    afrikaans_val = data.read().strip().split("\n")

### Greedy Search

In [10]:
VAL_AF_REF = [[sent] for sent in afrikaans_val]

VAL_TRANSLATED = [translator.translate_sentence(sent) for sent in english_val]

corpus_bleu(VAL_TRANSLATED, VAL_AF_REF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.6697274030464273
precisions          : [0.7205201675115716]
brevity_penalty     : 0.929505422949416
length_ratio        : 0.9318773106942353
translation_length  : 13611
reference_length    : 14606
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.5767405182610258
precisions          : [0.7205201675115716, 0.5343316871116776]
brevity_penalty     : 0.929505422949416
length_ratio        : 0.9318773106942353
translation_length  : 13611
reference_length    : 14606
******************************************************************************************
            

### Beam Search

In [11]:
VAL_TRANSLATED = [translator.translate_sentence(sent, method="beam", beam_width=2) for sent in english_val]

corpus_bleu(VAL_TRANSLATED, VAL_AF_REF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.6711800947007869
precisions          : [0.7054046284317953]
brevity_penalty     : 0.951482408320606
length_ratio        : 0.9526222100506642
translation_length  : 13914
reference_length    : 14606
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.5780009139628087
precisions          : [0.7054046284317953, 0.523139145990511]
brevity_penalty     : 0.951482408320606
length_ratio        : 0.9526222100506642
translation_length  : 13914
reference_length    : 14606
******************************************************************************************
             

## Evaluate on the SUN validation set only

In [12]:
with open(f"{config.VAL_DATA}/sun_english.txt") as data:
    sun_english_val = data.read().strip().split("\n")
with open(f"{config.VAL_DATA}/sun_afrikaans.txt") as data:
    sun_afrikaans_val = data.read().strip().split("\n")

### Greedy Search

In [13]:
SUN_VAL_AF = [[sent] for sent in sun_afrikaans_val]
SUN_VAL_TRANSLATED = [translator.translate_sentence(sent) for sent in sun_english_val]
corpus_bleu(SUN_VAL_TRANSLATED, SUN_VAL_AF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.4447011991151803
precisions          : [0.5130356607731495]
brevity_penalty     : 0.8668036807519607
length_ratio        : 0.8749344520188779
translation_length  : 3337
reference_length    : 3814
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.33435312673768314
precisions          : [0.5130356607731495, 0.2900158478605388]
brevity_penalty     : 0.8668036807519607
length_ratio        : 0.8749344520188779
translation_length  : 3337
reference_length    : 3814
******************************************************************************************
             

### Beam Search

In [14]:
SUN_VAL_TRANSLATED = [translator.translate_sentence(sent, method="beam", beam_width=2) for sent in sun_english_val]
corpus_bleu(SUN_VAL_TRANSLATED, SUN_VAL_AF)

Using the latest cached version of the module from /Users/lucien/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bleu/9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Thu Jul 18 16:29:52 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.447827350834006
precisions          : [0.4802244039270687]
brevity_penalty     : 0.9325376785766538
length_ratio        : 0.9347142108023073
translation_length  : 3565
reference_length    : 3814
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.3379156989466911
precisions          : [0.4802244039270687, 0.2734259532958912]
brevity_penalty     : 0.9325376785766538
length_ratio        : 0.9347142108023073
translation_length  : 3565
reference_length    : 3814
******************************************************************************************
               