# NEURAL MACHINE TRANSLATION - GRU

## Required Module & Config files

In [1]:
import src.RNN_GRU as gruNMT
from src.Tokenizer import Corpus, LangData, dataLoader
from src.utils import load_config, get_device, train_model, sentence_bleu, corpus_bleu
from src.Translator import Translator
from torch.nn import CrossEntropyLoss
from torch.optim import NAdam
import evaluate
import numpy as np
from torchinfo import summary

# Loading config file
config = load_config()
# Get device : GPU/MPS Back-End/CPU
device = get_device()
print(f"Using device: {device}")

Using device: mps


## Load the dataset

In [2]:
# Encoder-Source
english_data = Corpus(f"{config.TRAIN_DATA}/english.txt", "English")
afrikaans_data = Corpus(f"{config.TRAIN_DATA}/afrikaans.txt", "Afrikaans")

## Set Hyperparameters

In [3]:
# Encoder - source
IN_ENCODER = english_data.vocab_size
ENCODER_EMB = 256

# Decoder - target
IN_DECODER = afrikaans_data.vocab_size
OUT_DECODER = afrikaans_data.vocab_size
DECODER_EMB = 256

# Shared
HIDDEN_SIZE = 1024
NUM_LAYERS = 2

LR = 1e-3
BATCH_SIZE = 128

## Set the model

In [4]:
encoder_net = gruNMT.Encoder(IN_ENCODER, ENCODER_EMB, HIDDEN_SIZE, NUM_LAYERS, type="GRU").to(device)
decoder_net = gruNMT.Decoder(IN_DECODER, DECODER_EMB, HIDDEN_SIZE, NUM_LAYERS, type='GRU').to(device)
model = gruNMT.GRU_NMT(encoder_net, decoder_net, OUT_DECODER)

summary(model, depth=4)

Layer (type:depth-idx)                   Param #
GRU_NMT                                  --
├─Encoder: 1-1                           --
│    └─GRU: 2-1                          10,235,904
│    └─Embedding: 2-2                    744,448
├─Decoder: 1-2                           --
│    └─GRU: 2-3                          10,235,904
│    └─Embedding: 2-4                    737,280
│    └─Linear: 2-5                       2,952,000
Total params: 24,905,536
Trainable params: 24,905,536
Non-trainable params: 0

In [5]:
train_data = LangData(english_data, afrikaans_data)
train_loader = dataLoader(train_data, BATCH_SIZE)

pad_idx = afrikaans_data.stoi['<pad>']
criterion = CrossEntropyLoss(ignore_index=0)

optimizer = NAdam(model.parameters(), LR)
translator = Translator(model, english_data, afrikaans_data, device)

In [6]:
# Data used for follow-up durring training
mytext = "<sos> given that we represent the target output as $y\in\{0,1\}$ and we have $n$ training points , we can write the negative log likelihood of the parameters as follows: <eos>"
ground = "<sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as: <eos>"

predicted = translator.translate_sentence(mytext)
bleu = sentence_bleu(prediction=[predicted], reference=[ground])
print(f"Pred: {predicted}")
print(f"Refe: {ground}")
print(f"BLEU SCORES: {bleu}")

Pred: <sos> \textit{wenk water (nll) lyn herken." $x(f)$ $x(f)$ $x(f)$ dof deeglik gedoen gedoen $[-1,0]^\top$ pole geheim positiewe aflewerings (aliasing) bang netsowel nog getoon getoon halwe beslissingsgrens oorkant beslissingsgrens skrik bely oorkant soortgelyke $f_s=2000\textrm{hz}$ $f_s=2000\textrm{hz}$ (nll) drankie $x(f)$ $x(f)$ deeglik deeglik aanbod vermenigvuldigings vermenigvuldigings vermenigvuldigings $\omega_c$ spontaan lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak lukraak l

## Train the data

In [7]:
EPOCHS = 15
params = {
    "model": model,
    "train_loader": train_loader,
    "optimizer": optimizer,
    "criterion": criterion,
    "device": device,
    "epochs": EPOCHS,
    "source_test": mytext,
    "reference": ground,
	"translator":translator
}

train_loss = train_model(**params)
np.save('gru_train_loss.npy', np.array(train_loss))

Epoch 1/15: 100%|██████████| 20/20 [00:08<00:00,  2.38batch/s, loss=1.692]


Predicted: <sos> tom het 'n <eos>
BLEU Score: [0.028, 0.022, 0.017, 0.0]


Epoch 2/15: 100%|██████████| 20/20 [00:07<00:00,  2.51batch/s, loss=1.469]


Predicted: <sos> ons het die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die volgende van die vo

Epoch 3/15: 100%|██████████| 20/20 [00:07<00:00,  2.55batch/s, loss=1.261]


Predicted: <sos> ons het die data van die stelsel gegee <eos>
BLEU Score: [0.12, 0.079, 0.056, 0.0]


Epoch 4/15: 100%|██████████| 20/20 [00:08<00:00,  2.47batch/s, loss=1.113]


Predicted: <sos> ons wil die data van die dac oorbetaal <eos>
BLEU Score: [0.108, 0.075, 0.054, 0.0]


Epoch 5/15: 100%|██████████| 20/20 [00:07<00:00,  2.55batch/s, loss=0.906]


Predicted: <sos> ons wil die data van die klassifiseerder in die tyd-gebied van die klassifiseerder is swart en die hond is wit <eos>
BLEU Score: [0.257, 0.158, 0.108, 0.0]


Epoch 6/15: 100%|██████████| 20/20 [00:07<00:00,  2.65batch/s, loss=0.739]


Predicted: <sos> as ons die data in die usd/euro van die data in die usd/euro <eos>
BLEU Score: [0.239, 0.195, 0.165, 0.135]


Epoch 7/15: 100%|██████████| 20/20 [00:07<00:00,  2.72batch/s, loss=0.586]


Predicted: <sos> as ons die teikenuittree initialisering voorstel en ons kan as basismodel gebruik word <eos>
BLEU Score: [0.276, 0.219, 0.181, 0.151]


Epoch 8/15: 100%|██████████| 20/20 [00:07<00:00,  2.60batch/s, loss=0.499]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons eerder het die \% verandering in die usd/euro mark , die \% verandering in die britse mark , en die \% verandering in die duitse mark <eos>
BLEU Score: [0.491, 0.447, 0.423, 0.402]


Epoch 9/15: 100%|██████████| 20/20 [00:07<00:00,  2.71batch/s, loss=0.395]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan ons kan die negatiewe log-waarskynlikheidskostefunksie gebruik as volg : <eos>
BLEU Score: [0.95, 0.883, 0.841, 0.804]


Epoch 10/15: 100%|██████████| 20/20 [00:07<00:00,  2.52batch/s, loss=0.353]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 11/15: 100%|██████████| 20/20 [00:07<00:00,  2.64batch/s, loss=0.296]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 12/15: 100%|██████████| 20/20 [00:07<00:00,  2.61batch/s, loss=0.272]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 13/15: 100%|██████████| 20/20 [00:07<00:00,  2.59batch/s, loss=0.257]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 14/15: 100%|██████████| 20/20 [00:07<00:00,  2.57batch/s, loss=0.255]


Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]


Epoch 15/15: 100%|██████████| 20/20 [00:07<00:00,  2.64batch/s, loss=0.246]

Predicted: <sos> as ons die teikenuittree voorstel as $y\in\{0,1\}$ en ons $n$ afrigpunte het , dan kan ons die negatiewe log-waarskynlikheidskostefunksie skryf as : <eos>
BLEU Score: [1.0, 1.0, 1.0, 1.0]





## Evaluate on the training set

In [8]:
EN_SRC = [' '.join(sent) for sent in english_data.data_str]
AF_REF = [[' '.join(sent)] for sent in afrikaans_data.data_str]
TRANSLATED = [translator.translate_sentence(sent) for sent in EN_SRC]
corpus_bleu(TRANSLATED, AF_REF)

                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.9953410081611113
precisions          : [0.9955297878681152]
brevity_penalty     : 0.9998103726183742
length_ratio        : 0.9998103905953736
translation_length  : 36911
reference_length    : 36918
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.9927935832844483
precisions          : [0.9955297878681152, 0.9904404927940493]
brevity_penalty     : 0.9998103726183742
length_ratio        : 0.9998103905953736
translation_length  : 36911
reference_length    : 36918
******************************************************************************************
          

## Evaluate on the validation set

In [9]:
with open(f"{config.VAL_DATA}/english.txt") as data:
    english_val = data.read().strip().split("\n")
with open(f"{config.VAL_DATA}/afrikaans.txt") as data:
    afrikaans_val = data.read().strip().split("\n")

### Greedy Search

In [10]:
VAL_AF_REF = [[sent] for sent in afrikaans_val]

VAL_TRANSLATED = [translator.translate_sentence(sent) for sent in english_val]

corpus_bleu(VAL_TRANSLATED, VAL_AF_REF)

                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.6390875042560435
precisions          : [0.6390875042560435]
brevity_penalty     : 1.0
length_ratio        : 1.0056841528557732
translation_length  : 14685
reference_length    : 14602
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.5501041650275929
precisions          : [0.6390875042560435, 0.47351041972409746]
brevity_penalty     : 1.0
length_ratio        : 1.0056841528557732
translation_length  : 14685
reference_length    : 14602
******************************************************************************************
                                     BL

### Beam Search

In [11]:
VAL_TRANSLATED = [translator.translate_sentence(sent, method="beam", beam_width=3) for sent in english_val]

corpus_bleu(VAL_TRANSLATED, VAL_AF_REF)

                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.6024474628395694
precisions          : [0.6024474628395694]
brevity_penalty     : 1.0
length_ratio        : 1.0688946719627448
translation_length  : 15608
reference_length    : 14602
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.5159631149156254
precisions          : [0.6024474628395694, 0.44189402790186244]
brevity_penalty     : 1.0
length_ratio        : 1.0688946719627448
translation_length  : 15608
reference_length    : 14602
******************************************************************************************
                                     BL

## Evaluate on the SUN validation set only

In [12]:
with open(f"{config.VAL_DATA}/sun_english.txt") as data:
    sun_english_val = data.read().strip().split("\n")
with open(f"{config.VAL_DATA}/sun_afrikaans.txt") as data:
    sun_afrikaans_val = data.read().strip().split("\n")

### Greedy Search

In [13]:
SUN_VAL_AF = [[sent] for sent in sun_afrikaans_val]
SUN_VAL_TRANSLATED = [translator.translate_sentence(sent) for sent in sun_english_val]
corpus_bleu(SUN_VAL_TRANSLATED, SUN_VAL_AF)

                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.38022813688212925
precisions          : [0.38022813688212925]
brevity_penalty     : 1.0
length_ratio        : 1.103303618248558
translation_length  : 4208
reference_length    : 3814
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.29089700193423074
precisions          : [0.38022813688212925, 0.22255340288127173]
brevity_penalty     : 1.0
length_ratio        : 1.103303618248558
translation_length  : 4208
reference_length    : 3814
******************************************************************************************
                                     BLEU

### Beam Search

In [14]:
SUN_VAL_TRANSLATED = [translator.translate_sentence(sent, method="beam", beam_width=3) for sent in sun_english_val]
corpus_bleu(SUN_VAL_TRANSLATED, SUN_VAL_AF)

                                     BLEU-1                                     
------------------------------------------------------------------------------------------
bleu                : 0.33950236479539375
precisions          : [0.3395023647953938]
brevity_penalty     : 1.0
length_ratio        : 1.2750393287886732
translation_length  : 4863
reference_length    : 3814
******************************************************************************************
                                     BLEU-2                                     
------------------------------------------------------------------------------------------
bleu                : 0.25392353867133005
precisions          : [0.3395023647953938, 0.18991668446913051]
brevity_penalty     : 1.0
length_ratio        : 1.2750393287886732
translation_length  : 4863
reference_length    : 3814
******************************************************************************************
                                     BLEU

In [15]:
metric = evaluate.load("bleu")
predictions = [translator.translate_sentence(sent, method="beam", beam_width=5) for sent in sun_english_val[10:20]]
labels = SUN_VAL_AF[10:20]
for source, pred, lab in zip(sun_english_val[10:20],predictions, labels):
    print(f"Source    : {source}")
    print(f"Prediction: {pred[:150]}")
    print(f"Label     : {lab[0][:150]}")
    print(f"BLEU      : {metric.compute(predictions=[pred], references=lab)['bleu']}")
    print()

Source    : <sos> component <eos>
Prediction: <sos> hallo <eos>
Label     : <sos> komponent <eos>
BLEU      : 0.0

Source    : <sos> architecture <eos>
Prediction: <sos> hallo <eos>
Label     : <sos> argitektuur <eos>
BLEU      : 0.0

Source    : <sos> specification <eos>
Prediction: <sos> hallo <eos>
Label     : <sos> spesifikasies <eos>
BLEU      : 0.0

Source    : <sos> at which stage of the design process would we choose the communication protocol between subsystems <eos>
Prediction: <sos> watter van die volgende kontinue-tyd seine $x(t)$ sou \textbf{nie} lei na die drywingsdigtheidspektrum hierbo nie as dit gemonster word teen $f_
Label     : <sos> by watter stap van die ontwerpsproses word die kommunikasie-kanaal tussen substelsels gekies <eos>
BLEU      : 0.0

Source    : <sos> motivate your answer <eos>
Prediction: <sos> motiveer jou antwoord <eos>
Label     : <sos> motiveer jou antwoord <eos>
BLEU      : 1.0

Source    : <sos> describe the meaning if a system is described as a