In [None]:
!pip install sacrebleu

In [None]:
from torchtext.models import T5Transform

padding_idx = 0
eos_idx = 1
max_seq_len = 512
t5_sp_model_path = "https://download.pytorch.org/models/text/t5_tokenizer_base.model"

transform = T5Transform(
    sp_model_path=t5_sp_model_path,
    max_seq_len=max_seq_len,
    eos_idx=eos_idx,
    padding_idx=padding_idx,
)

Alternatively, we can also use the transform shipped with the pre-trained models that does all of the above out-of-the-box

```
from torchtext.models import T5_BASE_GENERATION
transform = T5_BASE_GENERATION.transform()
```


## Model Preparation

torchtext provides SOTA pre-trained models that can be used directly for NLP tasks or fine-tuned on downstream tasks. Below
we use the pre-trained T5 model with standard base configuration to perform text summarization, sentiment classification, and
translation. For additional details on available pre-trained models, see [the torchtext documentation](https://pytorch.org/text/main/models.html)_





In [None]:
from torchtext.models import T5_BASE_GENERATION


t5_base = T5_BASE_GENERATION
transform = t5_base.transform()
model = t5_base.get_model()
model.eval()

## GenerationUtils

We can use torchtext's ``GenerationUtils`` to produce an output sequence based on the input sequence provided. This calls on the
model's encoder and decoder, and iteratively expands the decoded sequences until the end-of-sequence token is generated
for all sequences in the batch. The ``generate`` method shown below uses greedy search to generate the sequences. Beam search and
other decoding strategies are also supported.





In [None]:
from torchtext.prototype.generate import GenerationUtils
from functools import partial
from torch.utils.data import DataLoader

sequence_generator = GenerationUtils(model)

Finally, we can also load the Multi30k dataset to demonstrate English to German translation using the T5 model.
This dataset has a train, validation, and test split. Below we demo on the test split.

The T5 model uses the prefix "translate English to German" for this task.



## Generate Translations

Finally, we can also use the model to generate English to German translations on the first batch of examples from the Multi30k
test set.




## Translation Output

::

   Example 1:

   input_text: translate English to German: A man in an orange hat starring at something.

   prediction: Ein Mann in einem orangen Hut, der an etwas schaut.

   target: Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.


   Example 2:

   input_text: translate English to German: A Boston Terrier is running on lush green grass in front of a white fence.

   prediction: Ein Boston Terrier läuft auf üppigem grünem Gras vor einem weißen Zaun.

   target: Ein Boston Terrier läuft über saftig-grünes Gras vor einem weißen Zaun.


   Example 3:

   input_text: translate English to German: A girl in karate uniform breaking a stick with a front kick.

   prediction: Ein Mädchen in Karate-Uniform bricht einen Stöck mit einem Frontkick.

   target: Ein Mädchen in einem Karateanzug bricht ein Brett mit einem Tritt.


   Example 4:

   input_text: translate English to German: Five people wearing winter jackets and helmets stand in the snow, with snowmobiles in the background.

   prediction: Fünf Menschen mit Winterjacken und Helmen stehen im Schnee, mit Schneemobilen im Hintergrund.

   target: Fünf Leute in Winterjacken und mit Helmen stehen im Schnee mit Schneemobilen im Hintergrund.


   Example 5:

   input_text: translate English to German: People are fixing the roof of a house.

   prediction: Die Leute fixieren das Dach eines Hauses.

   target: Leute Reparieren das Dach eines Hauses.




In [None]:
from sacrebleu import corpus_bleu
def bleu_score(li_abs_hyp, li_abs_ref):
    """
    Computes the BLEU score
    :param li_abs_hyp: list of hypothesis abstracts (token strings)
    :param li_abs_ref: list of reference abstracts (token strings)
    """
    bleu = corpus_bleu(li_abs_hyp, [li_abs_ref])

    return bleu.score


In [None]:
import pandas as pd
test = pd.read_csv("test_2016_flickr.csv", index_col=0)
test_eng = list(map(lambda x: "translate English to German: "+x, test["to"]))

In [None]:
len(test)

In [None]:
!pip install accelerate

In [None]:
# pip install accelerate
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base", device_map="auto")

output_text = []
for input_text in test_eng:
  input_ids = tokenizer(input_text, return_tensors="pt", truncation=True).input_ids.to("cuda")

  outputs = model.generate(input_ids)
  output_text.append(tokenizer.decode(outputs[0]))


In [None]:
list(map(lambda x: x.split("<pad> ")[1][:-4], output_text))

In [None]:
bleu_score(test["from"].tolist(), output_text) #34.368

## Trying the German to english

In [None]:
!pip install transformers
!pip install sentencepiece
!pip install sacremoses

In [None]:
from transformers import pipeline

translator = pipeline("translation", model="Helsinki-NLP/opus-mt-de-en", device="cuda")

input_text = test["from"].tolist()
target = test["to"].tolist()
translated = translator(input_text)

print(translated)
output_text = list(map(lambda x: list(x.values())[0], translated))
print(output_text)


In [None]:
bleu_score(output_text, target) # 34.687