In [1]:
downloaded = True
try:
  import datasets
except ImportError:
  downloaded = False
  !pip install -q tqdm
  !pip install -q rich
  !pip install -q datasets
  !pip install -q torchmetrics
  !pip install -q pytorch-lightning
  !pip install -q git+https://github.com/MagedSaeed/tkseem

In [2]:
# %%capture --no-stderr
# if not downloaded:
#   !python -m spacy download en_core_web_sm
#   !python -m spacy download de_core_news_sm

In [1]:
import os
import re
import math
import shutil
import string
from pathlib import Path


import numpy as np

import datasets

import torch
from torch import nn
from torch import Tensor
import torch.nn.functional as F


import pytorch_lightning as pl
from torchmetrics.text import BLEUScore
from pytorch_lightning.callbacks import Timer, ModelCheckpoint, EarlyStopping, LearningRateMonitor, RichProgressBar


from tqdm.auto import tqdm

import tkseem as tk

In [2]:
seed = 42
batch_size = 64
os.environ['CUDA_LAUNCH_BLOCKING']='1'

In [3]:
pl.seed_everything(seed)

Global seed set to 42


42

In [4]:
source_language_code = 'en'
target_language_code = 'de'

# Prepare the dataset

## Download

In [5]:
dataset = datasets.load_dataset('bentrevett/multi30k')
dataset

DatasetDict({
    train: Dataset({
        features: ['en', 'de'],
        num_rows: 29000
    })
    validation: Dataset({
        features: ['en', 'de'],
        num_rows: 1014
    })
    test: Dataset({
        features: ['en', 'de'],
        num_rows: 1000
    })
})

In [9]:
train_dataset = dataset['train'].to_pandas()
train_dataset

Unnamed: 0,en,de
0,"Two young, White males are outside near many b...",Zwei junge weiße Männer sind im Freien in der ...
1,Several men in hard hats are operating a giant...,Mehrere Männer mit Schutzhelmen bedienen ein A...
2,A little girl climbing into a wooden playhouse.,Ein kleines Mädchen klettert in ein Spielhaus ...
3,A man in a blue shirt is standing on a ladder ...,Ein Mann in einem blauen Hemd steht auf einer ...
4,Two men are at the stove preparing food.,Zwei Männer stehen am Herd und bereiten Essen zu.
...,...,...
28995,A woman behind a scrolled wall is writing,Eine Frau schreibt hinter einer verschnörkelte...
28996,A rock climber practices on a rock climbing wall.,Ein Bergsteiger übt an einer Kletterwand.
28997,Two male construction workers are working on a...,Zwei Bauarbeiter arbeiten auf einer Straße vor...
28998,An elderly man sits outside a storefront accom...,Ein älterer Mann sitzt mit einem Jungen mit ei...


In [6]:
val_dataset = dataset['validation'].to_pandas()
val_dataset

Unnamed: 0,en,de
0,A group of men are loading cotton onto a truck,Eine Gruppe von Männern lädt Baumwolle auf ein...
1,A man sleeping in a green room on a couch.,Ein Mann schläft in einem grünen Raum auf eine...
2,A boy wearing headphones sits on a woman's sho...,Ein Junge mit Kopfhörern sitzt auf den Schulte...
3,Two men setting up a blue ice fishing hut on a...,Zwei Männer bauen eine blaue Eisfischerhütte a...
4,A balding man wearing a red life jacket is sit...,"Ein Mann mit beginnender Glatze, der eine rote..."
...,...,...
1009,"At some sort of carnival, a man is making cott...",Bei einer Art Jahrmarkt stellt ein Mann Zucker...
1010,A bunch of police officers are standing outsid...,Eine Gruppe von Polizisten steht vor einem Bus.
1011,A elderly white-haired woman is looking inside...,Eine ältere weißhaarige Frau sieht in ihre Kas...
1012,Two men are standing at telephone booths outside.,Zwei Männer stehen an Telefonzellen im Freien.


In [7]:
test_dataset = dataset['test'].to_pandas()
test_dataset

Unnamed: 0,en,de
0,A man in an orange hat starring at something.,"Ein Mann mit einem orangefarbenen Hut, der etw..."
1,A Boston Terrier is running on lush green gras...,Ein Boston Terrier läuft über saftig-grünes Gr...
2,A girl in karate uniform breaking a stick with...,Ein Mädchen in einem Karateanzug bricht ein Br...
3,Five people wearing winter jackets and helmets...,Fünf Leute in Winterjacken und mit Helmen steh...
4,People are fixing the roof of a house.,Leute Reparieren das Dach eines Hauses.
...,...,...
995,"Marathon runners are racing on a city street, ...",Marathonläuferinnen laufen auf einer städtisch...
996,Asian woman wearing a sunhat while riding a bike.,Asiatische Frau trägt einen Sonnenhut beim Fah...
997,Some children are outside playing in the dirt ...,Ein paar Kinder sind im Freien und spielen auf...
998,An older man is playing a video arcade game.,Ein älterer Mann spielt ein Videospiel.


## Prepare

invistigating the sequence length

In [10]:
max_doc = max(train_dataset['en'], key=lambda document: len(document.split()))
len(max_doc.split()),max_doc

(37,
 'A soccer player wearing a red and white uniform catching the ball for a score as the opponent wearing a blue and white uniform is in the background along with a crowd of people watching the game')

In [11]:
max_doc = max(train_dataset['de'], key=lambda document: len(document.split()))
len(max_doc.split()),max_doc

(39,
 'Drei junge Erwachsenen sitzen herum, wobei eine junge Frau so tut, als ob sie einem der jungen Männer ins Gesicht tritt und dabei lacht, und der junge Mann hinter sieht aus, als ob er gerade mitten im Satz ist.')

In [12]:
seq_len = 35

Helper function to transform text to np dataset

English text processing

In [13]:
def process_source(text):
  strip_chars = string.punctuation
  clean_text = ''.join(c for c in text if c not in strip_chars)
  return clean_text.lower()

In [14]:
def process_target(text):
  strip_chars = string.punctuation
  strip_chars = strip_chars.replace('<','').replace('>','')
  return ''.join(c for c in text if c not in strip_chars)

In [15]:
def create_features_from_text_list(text_list,tokenizer,is_source=False):
  encoded = list()
  for doc in tqdm(text_list):
    if is_source:
      encoded_doc = tokenizer.encode(process_source(doc))
    else:
      encoded_doc = tokenizer.encode("<bos> "+process_target(doc)+" <eos>")
    encoded_doc = tokenizer.pad(encoded_doc,length=seq_len)
    encoded_doc = encoded_doc[:seq_len]
    if not is_source:
      if encoded_doc[-1] != tokenizer.token_to_id(tokenizer.pad_token):
        encoded_doc[-1] = tokenizer.token_to_id('<eos>')
    encoded.append(np.array(encoded_doc))
  return np.array(encoded)

## Tokenizers

create source tokenizer



In [16]:
source_tokenizer = tk.WordTokenizer(vocab_size=1_000_000)

In [17]:
source_tokenizer.train(text='\n'.join(map(process_source,train_dataset[source_language_code])))

Training WordTokenizer ...


In [19]:
source_tokenizer.vocab_size

10204

In [20]:

# vocab = {
#     vocab:freq for vocab,freq in source_tokenizer.vocab.items()
#     if freq > 1 or freq < 0
# }
# source_tokenizer.vocab = vocab
# source_tokenizer.vocab_size = len(vocab)
# source_tokenizer.vocab_size

create target tokenizer

In [18]:
target_tokenizer = tk.WordTokenizer(vocab_size=1_000_000,special_tokens=["<bos>","<eos>"])

In [19]:
target_tokenizer.train(text='\n'.join(map(process_target,train_dataset[target_language_code])))

Training WordTokenizer ...


In [20]:
target_tokenizer.vocab_size

19212

In [21]:
vocab = {
    vocab:freq for vocab,freq in target_tokenizer.vocab.items()
    if freq > 1 or freq < 0
}
target_tokenizer.vocab = vocab
target_tokenizer.vocab_size = len(vocab)
target_tokenizer.vocab_size
target_tokenizer.vocab_size

7967

In [22]:
list(target_tokenizer.vocab)[:10]

['<UNK>', '<PAD>', '<bos>', '<eos>', 'Ein', 'einem', 'in', 'und', 'mit', 'auf']

## Create Datasets and Dataloaders

prepare datasets as numpy objects

In [23]:
encoded_source_trainset = create_features_from_text_list(
    text_list=train_dataset[source_language_code],
    tokenizer=source_tokenizer,
    is_source = True,
  )
encoded_target_trainset = create_features_from_text_list(
    text_list=train_dataset[target_language_code],
    tokenizer=target_tokenizer,
  )
encoded_source_trainset,encoded_target_trainset

  0%|          | 0/29000 [00:00<?, ?it/s]

  0%|          | 0/29000 [00:00<?, ?it/s]

(array([[ 12,  19,  20, ...,   1,   1,   1],
        [104,  26,   3, ...,   1,   1,   1],
        [  2,  47,  28, ...,   1,   1,   1],
        ...,
        [ 12, 153, 205, ...,   1,   1,   1],
        [ 17, 224,   6, ...,   1,   1,   1],
        [  2,   6,   3, ...,   1,   1,   1]]),
 array([[  2,  19,  83, ...,   1,   1,   1],
        [  2,  82,  29, ...,   1,   1,   1],
        [  2,   4,  67, ...,   1,   1,   1],
        ...,
        [  2,  19, 305, ...,   1,   1,   1],
        [  2,   4, 132, ...,   1,   1,   1],
        [  2,   4,  10, ...,   1,   1,   1]]))

In [24]:
encoded_source_valset = create_features_from_text_list(
    text_list=val_dataset[source_language_code],
    tokenizer=source_tokenizer,
    is_source = True,
  )
encoded_target_valset = create_features_from_text_list(
    text_list=val_dataset[target_language_code],
    tokenizer=target_tokenizer,
  )
encoded_source_valset,encoded_target_valset

  0%|          | 0/1014 [00:00<?, ?it/s]

  0%|          | 0/1014 [00:00<?, ?it/s]

(array([[   2,   34,    9, ...,    1,    1,    1],
        [   2,    6,  364, ...,    1,    1,    1],
        [   2,   30,   18, ...,    1,    1,    1],
        ...,
        [   2,  224, 3120, ...,    1,    1,    1],
        [  12,   26,   13, ...,    1,    1,    1],
        [  12,   45,   18, ...,    1,    1,    1]]),
 array([[  2,  12,  36, ...,   1,   1,   1],
        [  2,   4,  10, ...,   1,   1,   1],
        [  2,   4,  33, ...,   1,   1,   1],
        ...,
        [  2,  12, 179, ...,   1,   1,   1],
        [  2,  19,  29, ...,   1,   1,   1],
        [  2,  19,  44, ...,   1,   1,   1]]))

In [25]:
encoded_source_testset = create_features_from_text_list(
    text_list=test_dataset[source_language_code],
    tokenizer=source_tokenizer,
    is_source = True,
  )
encoded_target_testset = create_features_from_text_list(
    text_list=test_dataset[target_language_code],
    tokenizer=target_tokenizer,
  )
encoded_source_testset,encoded_target_testset

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

(array([[   2,    6,    3, ...,    1,    1,    1],
        [   2, 3759, 2654, ...,    1,    1,    1],
        [   2,   28,    3, ...,    1,    1,    1],
        ...,
        [  69,   58,   13, ...,    1,    1,    1],
        [  17,  105,    6, ...,    1,    1,    1],
        [   2,   28,   16, ...,    1,    1,    1]]),
 array([[   2,    4,   10, ...,    1,    1,    1],
        [   2,    4, 4194, ...,    1,    1,    1],
        [   2,    4,   25, ...,    1,    1,    1],
        ...,
        [   2,    4,  300, ...,    1,    1,    1],
        [   2,    4,  132, ...,    1,    1,    1],
        [   2,    4,   25, ...,    1,    1,    1]]))

prepare datasets as torch datasets objects

In [26]:
# create tensor datasets
trainset = torch.utils.data.TensorDataset(
    torch.from_numpy(encoded_source_trainset),
    torch.from_numpy(encoded_target_trainset),
  )
valset = torch.utils.data.TensorDataset(
    torch.from_numpy(encoded_source_valset),
    torch.from_numpy(encoded_target_valset),
  )
testset = torch.utils.data.TensorDataset(
    torch.from_numpy(encoded_source_testset),
    torch.from_numpy(encoded_target_testset),
  )
len(trainset),len(valset),len(testset)

(29000, 1014, 1000)

build data loaders

In [27]:
train_loader = torch.utils.data.DataLoader(
    trainset,
    shuffle=True,
    num_workers=4,
    batch_size=batch_size,
  )
val_loader = torch.utils.data.DataLoader(
    valset,
    shuffle=False,
    num_workers=2,
    drop_last=False,
    batch_size=batch_size,
  )
test_loader = torch.utils.data.DataLoader(
    testset,
    shuffle=False,
    num_workers=2,
    drop_last=False,
    batch_size=batch_size,
  )

# Build the Transformer-Based Model

## Model Architecture

In [28]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [29]:
class PositionalEncoding(nn.Module):
    def __init__(
      self,
      emb_size: int,
      dropout: float,
      maxlen: int = 5000,
    ):
      super(PositionalEncoding, self).__init__()
      den = torch.exp(- torch.arange(0, emb_size, 2)* math.log(10000) / emb_size)
      pos = torch.arange(0, maxlen).reshape(maxlen, 1)
      pos_embedding = torch.zeros((maxlen, emb_size))
      pos_embedding[:, 0::2] = torch.sin(pos * den)
      pos_embedding[:, 1::2] = torch.cos(pos * den)
      pos_embedding = pos_embedding.unsqueeze(-2)

      self.dropout = nn.Dropout(dropout)
      self.register_buffer('pos_embedding', pos_embedding)

    def forward(self, token_embedding: torch.Tensor):
      return self.dropout(token_embedding + self.pos_embedding[:token_embedding.size(0), :])

In [30]:
# helper Module to convert tensor of input indices into corresponding tensor of token embeddings
class TokenEmbedding(nn.Module):
    def __init__(self, vocab_size, emb_size, pad_token_id=1):
        super().__init__()
        # self.embedding = nn.Embedding(vocab_size, emb_size,padding_idx=pad_token_id)
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size

    def forward(self, tokens: Tensor):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)

In [31]:
def generate_square_subsequent_mask(sz):
  mask = (torch.triu(torch.ones((sz, sz), device=device)) == 1).transpose(0,1)
  mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
  return mask


def create_mask(src, tgt, pad_idx):
  src_seq_len = src.shape[1]
  tgt_seq_len = tgt.shape[1]
  tgt_mask = generate_square_subsequent_mask(tgt_seq_len)
  src_mask = torch.zeros((src_seq_len, src_seq_len),device=device).type(torch.bool)
  src_padding_mask = (src == pad_idx).clone().detach()
  tgt_padding_mask = (tgt == pad_idx).clone().detach()
  return src_mask, tgt_mask, src_padding_mask, tgt_padding_mask

In [32]:
# Seq2Seq Network
class Seq2SeqTransformer(pl.LightningModule):
    def __init__(
        self,
        nhead=8,
        emb_size=256,
        pad_token_id=1,
        num_decoder_layers=2,
        num_encoder_layers=2,
        dim_feedforward= 2048,
        dropout: float = 0.1,
        learning_rate = 0.0001,
        src_vocab_size=source_tokenizer.vocab_size,
        tgt_vocab_size=target_tokenizer.vocab_size,
    ):
      super().__init__()
      self.save_hyperparameters()

      self.pad_token_id = pad_token_id
      self.learning_rate = learning_rate
      self.source_vocab_size = src_vocab_size
      self.target_vocab_size = tgt_vocab_size

      self.transformer = nn.Transformer(
          d_model=emb_size,
          nhead=nhead,
          num_encoder_layers=num_encoder_layers,
          num_decoder_layers=num_decoder_layers,
          dim_feedforward=dim_feedforward,
          dropout=dropout,
          batch_first=True,
        )
      self.src_tok_emb = TokenEmbedding(src_vocab_size, emb_size)
      self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
      self.positional_encoding = PositionalEncoding(
          emb_size,
          dropout=dropout,
        )
      self.dense = nn.Linear(emb_size, tgt_vocab_size)

    def forward(self,src,trg):
      src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(
        src,
        trg,
        pad_idx=self.pad_token_id,
      )
      src_emb = self.positional_encoding(self.src_tok_emb(src))
      tgt_emb = self.positional_encoding(self.tgt_tok_emb(trg))
      outs = self.transformer(
          src=src_emb,
          tgt=tgt_emb,
          src_mask=src_mask,
          tgt_mask=tgt_mask,
          src_key_padding_mask=src_padding_mask,
          tgt_key_padding_mask=tgt_padding_mask,
          memory_key_padding_mask=src_padding_mask,
        )
      return self.dense(outs)

    def step(self, batch):
      inputs,targets = batch
      outputs = self(inputs,targets[:,:-1].contiguous())
      return outputs

    def training_step(self, batch, batch_idx):
      inputs, targets = batch
      outputs = self.step(batch)
      loss = F.cross_entropy(
          outputs.view(-1, self.target_vocab_size),
          targets[:,1:].contiguous().view(-1),
          ignore_index=self.pad_token_id,
        )
      self.log('loss',loss,prog_bar=True)
      return loss

    def validation_step(self, batch, batch_idx):
      inputs, targets = batch
      outputs = self.step(batch)
      loss = F.cross_entropy(
          outputs.view(-1, self.target_vocab_size),
          targets[:,1:].contiguous().view(-1),
          ignore_index=self.pad_token_id,
        )
      self.log('val_loss',loss,prog_bar=True)
      return loss

    def test_step(self, batch, batch_idx):
      inputs, targets = batch
      outputs = self.step(batch)
      loss = F.cross_entropy(
          outputs.view(-1, self.target_vocab_size),
          targets[:,1:].contiguous().view(-1),
          ignore_index=self.pad_token_id,
        )
      self.log('test_loss',loss,prog_bar=True)
      return loss

    def configure_optimizers(self):
      optimizer = torch.optim.Adam(
          self.parameters(),
          lr=self.learning_rate,
          eps=1e-9
      )
      scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
          optimizer=optimizer,
          factor=0.25,
          patience=1,
          verbose=True,
      )
      return {
          "optimizer": optimizer,
          "lr_scheduler": scheduler,
          "monitor": "val_loss",
      }

    def translate(self, input_sentence, source_tokenizer,target_tokenizer, max_len=seq_len):
      was_training = self.training is True
      self.eval()
      encoded_input_sentence = torch.tensor(source_tokenizer.encode(input_sentence)).view(1,-1).to(device)
      target = '<bos> '
      for i in range(max_len):
        encoded_target = torch.tensor(target_tokenizer.encode(target)).view(1,-1).to(device)
        outputs = self(src=encoded_input_sentence,trg=encoded_target)
        next_word_id = torch.argmax(outputs[0,i,:])
        next_word = target_tokenizer.id_to_token(next_word_id)
        target += f' {next_word.strip()} '
        if next_word == '<eos>':
          break
      if was_training:
        self.train()
      return target


## trainer

In [35]:
def train_model(
    model,
    train_dataloader,
    val_dataloader,
    max_epochs=20,
  ):
  checkpoints_path = Path(f"./NMT/")
  shutil.rmtree(checkpoints_path, ignore_errors=True)
  checkpoint_callback = ModelCheckpoint(
      mode="min",
      save_top_k=1,
      verbose=False,
      save_last=True,
      monitor="val_loss",
      save_weights_only=False,
      auto_insert_metric_name=True,
      save_on_train_epoch_end=False,
      dirpath=f"{checkpoints_path}/checkpoints",
      filename="{epoch}-{val_loss:.3f}-{step}",
  )
  callbacks = list()
  # callbacks.append(RichProgressBar())
  callbacks.append(checkpoint_callback)
  early_stopping_callback = EarlyStopping(
      monitor="val_loss",
      # min_delta=0.025,
      min_delta=0,
      patience=6,
      check_finite=True,
  )
  callbacks.append(early_stopping_callback)
  callbacks.append(RichProgressBar())
  lr_monitor = LearningRateMonitor(
      logging_interval="step",
      log_momentum=True,
  )
  callbacks.append(lr_monitor)
  devices = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  trainer = pl.Trainer(
      devices=[0],
      deterministic=True,
      callbacks=callbacks,
      gradient_clip_val=5,
      fast_dev_run=False,
      max_epochs=max_epochs,
      val_check_interval=0.5,
      accelerator="auto",
      # log_every_n_steps=max(len(train_dataloader) // 25, 1),
      log_every_n_steps=max(len(train_dataloader) // 25, 1),
  )
  trainer.validate(
      model=model,
      dataloaders=val_dataloader,
  )
  trainer.fit(
      model,
      train_dataloader,
      val_dataloader,
  )
  return trainer

## train and run

In [36]:
model = Seq2SeqTransformer()

# model initialization
for p in model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)


trainer = train_model(
    model=model,
    train_dataloader=train_loader,
    val_dataloader=val_loader,
    max_epochs=50,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A4500') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
  rank_zero_warn(


Output()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Output()

In [38]:
trainer.test(ckpt_path='best',dataloaders=test_loader)

Restoring states from the checkpoint path at /mnt/projects_volume/MyProjectsLarge/DotlessArabic/NMT/checkpoints/epoch=20-val_loss=1.998-step=9534.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /mnt/projects_volume/MyProjectsLarge/DotlessArabic/NMT/checkpoints/epoch=20-val_loss=1.998-step=9534.ckpt
  rank_zero_warn(


Output()

[{'test_loss': 2.000175952911377}]

In [43]:
model = Seq2SeqTransformer.load_from_checkpoint(trainer.checkpoint_callback.best_model_path).to(device)

In [44]:
source_test_sentences = list(map(process_source,test_dataset[source_language_code]))

In [45]:
source_test_sentences[1]

'a boston terrier is running on lush green grass in front of a white fence'

In [46]:
model.translate(
    # input_sentence = process_english('a man is crossing a road'),
    input_sentence = source_test_sentences[1],
    source_tokenizer=source_tokenizer,
    target_tokenizer=target_tokenizer,
)



'<bos>  Ein  <UNK>  rennt  vor  einem  grünen  Zaun  im  Gras  <eos> '

## Predicting on test set

In [47]:
source_test_sentences = list(map(process_source,test_dataset[source_language_code]))
len(source_test_sentences)

1000

In [48]:
sources = [sentence for sentence in source_test_sentences]

In [49]:
sources[0]

'a man in an orange hat starring at something'

In [50]:
targets = [['<bos> '+process_target(sentence)+' <eos>'] for sentence in list(test_dataset[target_language_code])]
targets[0]

['<bos> Ein Mann mit einem orangefarbenen Hut der etwas anstarrt <eos>']

In [51]:
preds = [model.translate(
    sentence,
    source_tokenizer=source_tokenizer,
    target_tokenizer=target_tokenizer,
  )
  for sentence in tqdm(sources)
]

  0%|          | 0/1000 [00:00<?, ?it/s]

Address the issue of multi spaces of preds

In [None]:
preds = list(map(lambda text:re.sub('\s+',' ',text), tqdm(preds)))

  0%|          | 0/1000 [00:00<?, ?it/s]

In [None]:
preds[0],targets[0]

('<bos> Ein Mann mit einem orangefarbenen Hut starrt auf etwas <eos> ',
 ['<bos> Ein Mann mit einem orangefarbenen Hut der etwas anstarrt <eos>'])

In [None]:
bleu = BLEUScore(n_gram=4)
bleu(preds, targets)

tensor(0.3054)