In [1]:
from nltk.translate.meteor_score import single_meteor_score
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
from bert_score import score
from bert_score import plot_example
from bleurt import score as bleurt_score
from tensorflow.keras.preprocessing.text import Tokenizer

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\boezi\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\boezi\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [2]:
def generate_sentence_tokenizer(corpus):
    """
    Returns a function which tokenizes a sentence according to a vocabularu built on corpus
    Parameters:
     corpus: list of sentences
    Returns:
     function to compute sentence tokenization
    """
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(corpus)
    def tokenize_sentence(sentence):
        """
        Returns a list of string token given a sentence
        Parameters:
         sentence: sentence to tokenize
        Returns:
         list of string tokens
        """
        sequences = tokenizer.texts_to_sequences([sentence])
        sequence = sequences[0]
        tokenized_sentence = [tokenizer.index_word[i] for i in sequence]
        return tokenized_sentence
    
    return tokenize_sentence

In [4]:
def meteor_score(reference, candidate, tokenize_fun):
    """
    METEOR score
    Parameters:
        reference: reference translation
        candidate: generated translation
        tokenize_fun: tokenize function to apply to each sentence before score computation
    Returns:
        METEOR score
    """
    tokenized_candidate = tokenize_fun(candidate)
    tokenized_reference = tokenize_fun(reference)
    return round(single_meteor_score(tokenized_reference, tokenized_candidate), 4)

reference = "the cat is on the table"

candidate_one = "the dog is on the shelf"
candidate_two = "the cat is on the mat on the table"

sentence_tok = generate_sentence_tokenizer([reference, candidate_one, candidate_two])
sentence_tok("the cat is on the shelf")

score_one = meteor_score(reference, candidate_one, sentence_tok)
score_two = meteor_score(reference, candidate_two, sentence_tok)

print(f"Reference sentence: {reference}")
print(f"Sentence: {candidate_one}")
print(f"Score: {score_one}")
print(f"Sentence: {candidate_two}")
print(f"Score: {score_two}")

Reference sentence: the cat is on the table
Sentence: the dog is on the shelf
Score: 0.625
Sentence: the cat is on the mat on the table
Score: 0.8929


In [8]:
def bertscore_score(reference, candidate):
    """
    BERTScore score, see https://github.com/Tiiiger/bert_score/blob/master/example/Demo.ipynb for API
    Parameters:
        reference: reference translation
        candidate: generated translation
    Returns:
        BERTScore score (precision, recall and F1)
    """
    return score([candidate], [reference], lang='en', verbose=True, rescale_with_baseline=True)
    

reference = "the cat is on the table"
candidate = "the dog is on the shelf"

p, r, f = bertscore_score(reference, candidate)

plot_example(candidate, reference, lang='en', rescale_with_baseline=True)


TypeError: 'module' object is not callable

In [None]:
def bleurt_score(reference, candidate):
    """
    BLEURT score, see https://github.com/google-research/bleurt for the installation of the package and the use
    Parameters:
        reference: reference translation
        candidate: generated translation
    Returns:
        BLEURT score
    """
    checkpoint = "BLEURT-20" # inaccurate, to change with BLEURT-20
    scorer = bleurt_score.BleurtScorer(checkpoint)
    scores = scorer.score(references=[reference], candidates=[candidate])
    return scores

