In [2]:
import torch
import torch.nn as nn
import torch.optim as optim


import spacy
import numpy as np

import pickle
import random
import math
import time

import nltk
from nltk.tokenize import sent_tokenize

import gzip
import xml.etree.ElementTree as ET

import re

from tqdm import tqdm

import wandb

import torch.utils.data as data


In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/jpierre/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# Get necessary data

In [4]:
# learning set
with open('trainDict.pkl', 'rb') as file:
    trainDict = pickle.load(file)

# small validation set (useless here btw)
with open('valDict.pkl', 'rb') as file:
    valDict = pickle.load(file)

# test set
with open('testDict.pkl', 'rb') as file:
    testDict = pickle.load(file)

## Perform pre-processing steps

In [5]:
MAX_LENGTH = 40
SOS = '[CLS] '
EOS = ' [SEP]'

The preprocessing is realized according to the following steps:
1) add spaces before punctuation in order to take them as "words" later
2) get rid of possibly rigged characters
3) convert everything to lower cases and get rid of initial and final spaces

NB: an option is available in order to get apply everything on each sentence within the instances

In [6]:
def preprocess(sentences1:list[str], sentences2:list[str], directTreat:bool = True, addExtremes:bool = True):
    res1 = []
    res2 = []
    for i in range(len(sentences1)):

        sentence1 = sentences1[i]
        sentence2 = sentences2[i]
        
        if directTreat:
            sentence1 = re.sub(r"([.!?])", r" \1", sentence1)                             #add a space before '.', '!', '?'
            sentence1 = re.sub(r"[^a-zA-Zà-úÀ-ÚéèëËÉûÛ!?]+", r" ", sentence1)               # get rid of bad characters
            # new
            # sentence1 = re.sub(r"[^\w\s\']", r" ", sentence1)
            sentence1 = re.sub(r" +", r" ", sentence1)                                  # case of multiple spaces

            sentence2 = re.sub(r"([.!?])", r" \1", sentence2)                             #add a space before '.', '!', '?'
            sentence2 = re.sub(r"[^a-zA-Zà-úÀ-ÚéèëËÉûÛ!?]+", r" ", sentence2)               # get rid of bad characters
            # new
            sentence2 = re.sub(r" +", r" ", sentence2)

            L1 = len(sentence1.split(' '))
            L2 = len(sentence2.split(' '))
            if L1 < MAX_LENGTH:
                if L2 < MAX_LENGTH:
                    if addExtremes:
                        res1.append(SOS + sentence1.lower().strip() + EOS)
                        res2.append(SOS + sentence2.lower().strip() + EOS)
                    else:
                        res1.append(sentence1.lower().strip())
                        res2.append(sentence2.lower().strip())
        else:
            sentence1 = sent_tokenize(sentence1)                                          # tackle each sentence individally and perform some steps
            sentence2 = sent_tokenize(sentence2)
            
            if len(sentence1) == len(sentence2):
                for i in range(len(sentence1)):
                    s1 = sentence1[i]
                    s2 = sentence2[i]

                    s1 = re.sub(r"([.!?])", r" \1", s1)                                       #add a space before '.', '!', '?'
                    s1 = re.sub(r"[^a-zA-Zà-úÀ-ÚéèëËÉûÛ!?]+", r" ", s1)                         # get rid of bad characters
                    # new
                    s1 = re.sub(r" +", r" ", s1)

                    s2 = re.sub(r"([.!?])", r" \1", s2)                                       #add a space before '.', '!', '?'
                    s2 = re.sub(r"[^a-zA-Zà-úÀ-ÚéèëËÉûÛ!?]+", r" ", s2)                         # get rid of bad characters
                    # new
                    s2 = re.sub(r" +", r" ", s2)


                    L1 = len(s1.split(' '))
                    L2 = len(s2.split(' '))
                    if L1 < MAX_LENGTH:
                        if L2 < MAX_LENGTH:
                            if addExtremes:
                                res1.append(SOS + s1.lower().strip() + EOS)
                                res2.append(SOS + s2.lower().strip() + EOS)
                            else:
                                res1.append(s1.lower().strip())
                                res2.append(s2.lower().strip())

    return res1, res2

In [7]:
resEng, resFr = preprocess(trainDict['eng'],trainDict['fr'], directTreat=False, addExtremes=False)


In [8]:
n = 40
print(resEng[n])
print(resFr[n])

a school on higham hill road was later named edward redhead junior school
une école sur higham hill road est nommée plus tard edward redhead junior school


## Get the txt file of the data

In [9]:
def writeCSV(filePatth:str, sentences:list[str])->bool:
    with open(filePatth, "w", encoding="utf-8") as file:
        # Write each sentence followed by a newline character
        for sentence in tqdm(sentences):
            file.write(sentence + "\n")

    return 1

In [10]:
writeCSV('engData.txt', resEng)

100%|██████████| 41474/41474 [00:00<00:00, 1644353.99it/s]


1

In [11]:
writeCSV('frData.txt', resFr)

100%|██████████| 41474/41474 [00:00<00:00, 1259563.70it/s]


1

## Create vocabulary

In [12]:
# maintain a str having all the words


def getVocAndDicts(data:list[str]):
    wordsVoc = []

    for text in tqdm(resEng):
        wordsVoc.extend(sorted(list(set(text.split(' ')))))
        wordsVoc = list(set(wordsVoc))


    word2Index = {ch:i for i, ch in enumerate(wordsVoc)}
    index2Word = {i:ch for i, ch in enumerate(wordsVoc)}

    return wordsVoc, word2Index, index2Word


In [13]:
wordsVocEng, word2IndexEng, index2WordEng = getVocAndDicts(resEng)
wordsVocFr, word2IndexFr, index2WordFr = getVocAndDicts(resFr)

100%|██████████| 41474/41474 [01:29<00:00, 465.03it/s]
100%|██████████| 41474/41474 [01:28<00:00, 466.58it/s]


In [14]:
print(wordsVocEng[-100:])

['mulla', 'meacher', 'geographer', 'marche', 'gromit', 'ethic', 'impacts', 'pebibyte', 'apocalyptic', 'accomplished', 'deputation', 'bruch', 'fuse', 'wissenschaften', 'pa', 'amendment', 'miró', 'wholesalers', 'rejoice', 'heffener', 'karen', 'invader', 'skier', 'courtaulds', 'harris', 'cern', 'crittenden', 'administrate', 'adamstown', 'cherokees', 'ashigaru', 'athletic', 'mercantile', 'bloodlines', 'belsize', 'lgpl', 'chimera', 'alto', 'breakout', 'idries', 'ikenob', 'oboe', 'infrared', 'julie', 'winnsboro', 'stand', 'parsifal', 'athlete', 'utopians', 'revenue', 'biomimicry', 'ahfs', 'endeavors', 'ganga', 'refuted', 'larsen', 'acknowledgement', 'madhyamgram', 'wb', 'finlandssvenska', 'moyo', 'gris', 'accesses', 'stormaktstiden', 'philracom', 'criterion', 'return', 'ketchum', 'hernán', 'zipper', 'imitations', 'charisma', 'stamford', 'keybase', 'nightjet', 'borderlands', 'womersley', 'bower', 'specialties', 'citrate', 'meroitic', 'kangaroo', 'almería', 'hadrian', 'mushroom', 'acreage', 's

In [16]:
print(word2IndexEng['conformist'])
print(index2WordEng[21116])

21116
conformist


## Define the different embeddings

Different kinds of embeddings considered:
1) Word2Vec
2) GloVe
3) FastText
4) Bert

### Word2Vec
### https://radimrehurek.com/gensim/models/word2vec.html

In [16]:
import gensim

In [23]:
engCorp = [nltk.word_tokenize(sent) for sent in resEng]
frCorp = [nltk.word_tokenize(sent) for sent in resFr]

In [27]:
w2vEng1 = gensim.models.Word2Vec(engCorp, min_count = 1, vector_size=100, window = 5)
w2vEng1.save('word2Vec_english_cbow')

In [32]:
w2vEng2 = gensim.models.Word2Vec(engCorp, min_count = 1, vector_size=100, window = 5, sg = 1)
w2vEng2.save('word2Vec_english_skipgram')

In [30]:
w2vFr1 = gensim.models.Word2Vec(frCorp, min_count = 1, vector_size=100, window = 5)
w2vEng1.save('word2Vec_french_cbow')

In [33]:
w2vFr2 = gensim.models.Word2Vec(frCorp, min_count = 1, vector_size=100, window = 5, sg = 1)
w2vFr2.save('word2Vec_french_skipgram')

In [46]:
w2vEng2.wv.most_similar('art')

[('contemporary', 0.9020349383354187),
 ('visual', 0.8855995535850525),
 ('literary', 0.8743351101875305),
 ('renaissance', 0.8739112615585327),
 ('exhibition', 0.8719915151596069),
 ('science', 0.869580090045929),
 ('literature', 0.867002546787262),
 ('specializing', 0.8665003776550293),
 ('opera', 0.8648836612701416),
 ('photography', 0.8637229204177856)]

## GloVe

### We used glove-py but we could directly train the glove model from the Standford website (https://nlp.stanford.edu/projects/glove/)

ImportError: /home/jpierre/anaconda3/envs/myenvPy/lib/python3.9/site-packages/glove_pybind.cpython-39-x86_64-linux-gnu.so: undefined symbol: _ZN5Glove5trainENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE

## FastText
### https://fasttext.cc/docs/en/python-module.html

In [8]:
import fasttext

In [9]:
file1 = 'engData.txt'
file2 = 'frData.txt'

In [12]:
Engmodel1 = fasttext.train_unsupervised(file1, model='skipgram', dim = 100)
Engmodel1.save_model("Engmodel_skipgram.bin")

Read 0M words
Number of words:  11181
Number of labels: 0
Progress: 100.0% words/sec/thread:    2182 lr:  0.000000 avg.loss:  2.401508 ETA:   0h 0m 0s


In [24]:
Engmodel2 = fasttext.train_unsupervised(file1, model="cbow", dim = 100)
Engmodel2.save_model("Engmodel_cbow.bin")

Read 0M words
Number of words:  11181
Number of labels: 0
Progress: 100.0% words/sec/thread:    3067 lr:  0.000000 avg.loss:  2.595868 ETA:   0h 0m 0s


In [25]:
frModel1 = fasttext.train_unsupervised(file1, model='skipgram', dim = 100)
frModel1.save_model("frModel_skipgram.bin")

Read 0M words
Number of words:  11181
Number of labels: 0
Progress: 100.0% words/sec/thread:    2250 lr:  0.000000 avg.loss:  2.403181 ETA:   0h 0m 0s


In [26]:
frModel2 = fasttext.train_unsupervised(file1, model='cbow', dim = 100)
frModel2.save_model("frModel_cbow.bin")

Read 0M words
Number of words:  11181
Number of labels: 0
Progress: 100.0% words/sec/thread:    3143 lr:  0.000000 avg.loss:  2.599534 ETA:   0h 0m 0s


## Bert
(see https://www.scaler.com/topics/nlp/huggingface-transformers/ or https://mccormickml.com/2019/05/14/BERT-word-embeddings-tutorial/)

In [17]:
from transformers import BertTokenizer, BertModel
from transformers import AutoModel, AutoTokenizer


  from .autonotebook import tqdm as notebook_tqdm


In [18]:
tokenizerEng = BertTokenizer.from_pretrained('bert-base-uncased')

In [19]:
tokenizerFr = BertTokenizer.from_pretrained("dbmdz/bert-base-french-europeana-cased")

In [20]:
def getBertTokens(sentences, tokenizer):
    res = []
    Lmax = float("-inf")
    for sentence in tqdm(sentences):
        tokens = tokenizer.tokenize('[CLS] ' + sentence + ' [SEP]')     # [SEP] and [CLS] already taken into account
        res.append(tokens)
        if (len(res[-1]) > Lmax):
            Lmax = len(res[-1])
            
    print(Lmax)
        
    return res

In [21]:
tokensEng = getBertTokens(resEng, tokenizerEng)

100%|██████████| 41474/41474 [00:15<00:00, 2673.69it/s]

91





In [22]:
tokensFr = getBertTokens(resFr, tokenizerFr)

100%|██████████| 41474/41474 [00:12<00:00, 3214.21it/s]

100





In [23]:
print(resFr[0])
print(tokensFr[0])

sváfa a donné son nom à helgi et pendant ses batailles elle est toujours là pour lui le protégeant du danger
['[CLS]', '[UNK]', 'a', 'donné', 'son', 'nom', 'à', 'hel', '##gi', 'et', 'pendant', 'ses', 'batailles', 'elle', 'est', 'toujours', 'là', 'pour', 'lui', 'le', 'proté', '##geant', 'du', 'danger', '[SEP]']


In [24]:
print(resEng[0])
print(tokensEng[0])

sváfa had given helgi his name and during his battles she was always there for him shielding him from danger
['[CLS]', 'sv', '##af', '##a', 'had', 'given', 'he', '##l', '##gi', 'his', 'name', 'and', 'during', 'his', 'battles', 'she', 'was', 'always', 'there', 'for', 'him', 'shielding', 'him', 'from', 'danger', '[SEP]']


In [25]:
# let's consider 128 as maximum size
Lmax = 128

def BertEncode(sentences, tokenizer):
    inputIdsList = []
    maskList = []
    for sentence in tqdm(sentences):
        encodedWords = tokenizer.encode_plus(
                                    sentence,
                                    add_special_tokens = True,
                                    max_length = Lmax,
                                    padding = 'max_length',
                                    return_attention_mask = True,
                                    return_tensors = 'pt')
        inputIdsList.append(encodedWords['input_ids'])
        maskList.append(encodedWords['attention_mask'])
    return inputIdsList, maskList

In [26]:
inputIdsEng, masksEng = BertEncode(resEng, tokenizerEng)

100%|██████████| 41474/41474 [00:22<00:00, 1855.55it/s]


In [27]:
inputIdsFr, masksFr = BertEncode(resFr, tokenizerFr)

100%|██████████| 41474/41474 [00:20<00:00, 2065.28it/s]


In [28]:
# check same size
print(tokensEng[0])
print(len(tokensEng[0]))
print(torch.sum(masksEng[0]))

['[CLS]', 'sv', '##af', '##a', 'had', 'given', 'he', '##l', '##gi', 'his', 'name', 'and', 'during', 'his', 'battles', 'she', 'was', 'always', 'there', 'for', 'him', 'shielding', 'him', 'from', 'danger', '[SEP]']
26
tensor(26)


In [29]:
print(tokensFr[0])
print(len(tokensFr[0]))
print(torch.sum(masksFr[0]))

['[CLS]', '[UNK]', 'a', 'donné', 'son', 'nom', 'à', 'hel', '##gi', 'et', 'pendant', 'ses', 'batailles', 'elle', 'est', 'toujours', 'là', 'pour', 'lui', 'le', 'proté', '##geant', 'du', 'danger', '[SEP]']
25
tensor(25)


In [30]:
BertEng = BertModel.from_pretrained('bert-base-uncased')
BertFr = BertModel.from_pretrained("dbmdz/bert-base-french-europeana-cased")


BertEng.eval()
BertFr.eval()
print('ok')


ok


In [31]:
def getBertEmbedding(sentences, BertModel, tokenizer):
    
    ## Get the input ids and the masks for all the sentences
    
    inputIds, masks = BertEncode(sentences, tokenizer)
    
    ## Compute the Bert embedding for each
    
    inputIdsTensor = torch.vstack(inputIds)
    masksTensor = torch.vstack(masks)
    
    out = BertModel(inputIdsTensor, attention_mask = masksTensor)[0]
    res = []
    
    for i in range(len(masks)):
        res.append(out[i, masks[i].squeeze().bool(), :])
    return res

In [32]:
inputIdsTensor = torch.vstack(inputIdsEng[:4])
masksTensor = torch.vstack(masksEng[:4])
print(BertEng(inputIdsTensor, attention_mask = masksTensor)[0].shape)

torch.Size([4, 128, 768])


In [33]:
inputIdsTensor = torch.vstack(inputIdsFr[:4])
masksTensor = torch.vstack(masksFr[:4])
print(BertFr(inputIdsTensor, attention_mask = masksTensor)[0].shape)

torch.Size([4, 128, 768])


In [34]:
embsEng = getBertEmbedding(resEng[:4], BertModel = BertEng, tokenizer = tokenizerEng)
embsFr = getBertEmbedding(resFr[:4], BertModel = BertFr, tokenizer = tokenizerFr)

100%|██████████| 4/4 [00:00<00:00, 989.22it/s]
100%|██████████| 4/4 [00:00<00:00, 1174.71it/s]


In [35]:
print(tokensEng[1])
print(len(tokensEng[1]))
print(embsEng[1].shape)

['[CLS]', 'wesley', 'was', 'promoted', 'to', 'brigadier', 'general', 'in', 'while', 'in', 'kabul', 'afghanistan', '[SEP]']
13
torch.Size([13, 768])


In [36]:
print(tokensFr[1])
print(len(tokensFr[1]))
print(embsFr[1].shape)

['[CLS]', 'we', '##sl', '##ey', 'a', 'été', 'promu', 'général', 'de', 'brigade', 'en', 'alors', 'qu', 'il', 'se', 'trouvait', 'à', 'k', '##abo', '##ul', 'en', 'af', '##gh', '##ani', '##stan', '[SEP]']
26
torch.Size([26, 768])


### Make a function to centralize everyhting

In [38]:
def getEmbedding(sentences: list[str], model = 'Bert', params:list = []):
    if model == 'fasttext':
        return model[word]
    if model == 'gloVe':
        pass
    if model == 'word2Vec':
        pass
    if model == 'Bert':
        return getBertEmbedding(sentences, BertModel = params[0], tokenizer = params[1])

### Dataset

In [39]:
class textDatasets(Dataset):
    """
    Class to generate data tuples for learning
    """

    def __init__(self, lang1List:list[str], lang2List:list[str]):
        """ 
        Args:
        -----
        - `lang1List`: list of sentences to translate
        - `lang2List`: list of translated sentences
        """

        self.length = len(lang1List)

        # should encode here
        
        self.lang1List = lang1List
        self.lang2List = lang2List


    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        
        return self.lang1List[idx], self.lang2List[idx], idx

In [40]:
trainEng, trainFr = preprocess(trainDict['eng'],trainDict['fr'], directTreat=False, addExtremes=False)
valEng, valFr = preprocess(valDict['eng'],valDict['fr'], directTreat=False, addExtremes=False)
testEng, testFr = preprocess(testDict['eng'],testDict['fr'], directTreat=False, addExtremes=False)



datasetLearning = textDatasets(lang1List=trainEng, lang2List = trainFr)
datasetValidation = textDatasets(lang1List=valEng, lang2List = valFr)
datasetTest = textDatasets(lang1List=testEng, lang2List = testFr)

In [41]:
loaderLearning = data.DataLoader(datasetLearning, batch_size = 100, shuffle= True, num_workers = 1)
loaderValidation = data.DataLoader(datasetValidation, batch_size = 100, shuffle= True, num_workers = 1)
loaderTest = data.DataLoader(datasetTest, batch_size = 100, shuffle= True, num_workers = 1)

In [42]:
for s1, s2, _ in loaderLearning:
    print(list(s1)[0:4])
    print("")
    print(list(s2)[0:4])
    
    emb1 = getEmbedding(s1, model = 'Bert', params = [BertEng, tokenizerEng])
    print(emb1[0].shape)
    emb2 = getEmbedding(s2, model = 'Bert', params = [BertFr, tokenizerFr])
    print(emb2[0].shape)
    break

['the pamacca resort is the northern part of tapanahony and mainly inhabited by the paramaccan people', 'within the reserve spruce typically dominates although some localized areas have major amount of pines', 'national register of historic places registration canal street station post office', 'while bloodshot was self aware to a degree the human machine hybrid was still deemed to inflict unacceptably high levels of collateral damage']

['le ressort de pamacca est la partie nord de tapanahony et est principalement habité par le peuple paramacca', 'au sein de la réserve l épicéa domine même si quelques zones circonscrites présentent d importantes quantité de pins a', 'national park service', 'alors que bloodshot était conscient de lui même dans une certaine mesure l hybride homme machine était toujours réputé infliger des niveaux de dommages collatéraux élevés inacceptables']


100%|██████████| 100/100 [00:00<00:00, 1762.84it/s]


torch.Size([24, 768])


100%|██████████| 100/100 [00:00<00:00, 1982.42it/s]


AttributeError: 'list' object has no attribute 'shape'

## Architecture

### encoder

In [110]:
class encoder(nn.Module):
    def __init__(self, inputShape:int, hiddenShape:int, dropout:float=0.2):
        super(encoder, self).__init__()

        self.inputShape = inputShape
        self.hiddenShape = hiddenShape
        self.dropoutProb = dropout

        self.gru = nn.GRU(inputShape, hiddenShape, batch_first=True)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x:torch.tensor):

        ## [B, L, D]
        x = self.dropout(x)
        output, hidden = self.gru(x)
        return output, hidden

### decoder

In [111]:
class decoder(nn.Module):
    def __init__(self, inputShape:int, outputShape:int, dropout = 0.2):
        super(decoder, self).__init__()

        self.inputShape = inputShape
        self.outputShape = outputShape
        self.dropoutProb = dropout

        self.dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(inputShape, inputShape, batch_first=True)
        self.out = nn.Linear(inputShape, outputShape)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden  = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)

            if target_tensor is not None:
                # Teacher forcing: Feed the target as the next input
                decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing
            else:
                # Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  # detach from history as input

        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        return decoder_outputs, decoder_hidden, None # We return `None` for consistency in the training loop

    def forward_step(self, input, hidden):
        output = self.embedding(input)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.out(output)
        return output, hidden

In [None]:
enc = encoder()

### Perform training

In [112]:
nbEpoch=40

criterion = nn.NLLLoss()

In [113]:
wandb.init(project = 'master_thesis', name = "NLP_init")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjepi1202[0m ([33muliege_action_spotting_2022_2023_context[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
optimizerEncoder = torch.optim.Adam(encoder.parameters(), lr=0.005, weight_decay=5e-4)
optimizerDecoder = torch.optim.Adam(decoder.parameters(), lr=0.005, weight_decay=5e-4)

wandb.watch(model, log = 'all', log_freq=100)
model.train()

In [None]:
def computeLoss(out, y, criterion):
    pass

In [None]:
model = 'Bert'

In [None]:
j = 0

for i in range(nbEpoch):

    model.train()

    for sent1, sent2, _ in tqdm(loaderLearning):
        
        ## get the embeddings of the first sentences (in english)
        
        sent1 = getEmbedding(sent1, model = model).to(device)
        
        x = x.to(device)
        y = y.to(device)

        out =  model(x)
        

        loss = criterion(out.reshape(-1), y.reshape(-1))
        
        optimizerEncoder.zero_grad()
        optimizerDecoder.zero_grad()
        loss.backward()
        optimizerEncoder.step()
        optimizerDecoder.step()
        
        if ((j+1) % 100) == 0:
            wandb.log({'epoch': i, 'Training Loss': loss})

        if ((j+1) % 5000) == 0:
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for x_val, y_val, _ in loaderValidation:
                    x_val = x_val.to(device)
                    y_val = y_val.to(device)
                    
                    out = model(x_val)
                    
                    val_loss += criterion(out, y_val)
                    
                wandb.log({'epoch': i, 'Validation Loss': val_loss})
            NN.train()

        j += 1

### Test the learned model

In [None]:
## compute bleu