In [None]:
# Install the necessary libraries
!pip install transformers datasets evaluate accelerate
!pip install torch
!pip install -U nltk

In [None]:
# Import the necessary libraries
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from datasets import load_dataset, Dataset
import csv
import evaluate

In [None]:
# Import the necessary dataset
data = load_dataset("wmt16", name="de-en", split="test")

In [None]:
# Import a pretrained google-t5 model and create a translator from the pipeline
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
translator = pipeline("translation_en_to_de", model=model, tokenizer=tokenizer)

In [None]:
# Translate the sentences (around 2K) in the dataset and write them into a file
file = open('google_t5_translation.csv', 'w')
writer = csv.writer(file)
writer.writerow(['Sample Input', 'Translation', 'Ground Truth'])
for i in range(0, len(data)):
    text = data[i]["translation"]["en"]
    output = translator(text)
    #print(output)
    #print(output[0]["translation_text"])
    prediction = output[0]["translation_text"]
    ground_truth = data[i]["translation"]["de"]
    row = [text, prediction, ground_truth]
    writer.writerow(row)
file.close()

In [None]:
# Read the predictions and references from the created csv file
filename = "google_t5_translation.csv"
predictions = []
references = []
with open(filename, 'r') as csvfile:
    csvreader = csv.reader(csvfile)
    i = 0
    for row in csvreader:
        if i == 0:
            i = i + 1
            continue
        else:
            predictions.append(row[1])
            references.append(row[2])

In [None]:
# Evaluate the bleu metric
bleu = evaluate.load("bleu")
results = bleu.compute(predictions=predictions, references=references)
print("Bleu Metric:", results["bleu"])

In [None]:
# Evaluate the meteor metric
meteor = evaluate.load('meteor')
results = meteor.compute(predictions=predictions, references=references)
print("Meteor Metric", results['meteor'])