In [12]:
from nltk.translate.meteor_score import single_meteor_score
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
from tensorflow.keras.preprocessing.text import Tokenizer

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\boezi\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\boezi\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\omw-1.4.zip.


In [6]:
def generate_sentence_tokenizer(corpus):
    """
    Returns a function which tokenizes a sentence according to a vocabularu built on corpus
    Parameters:
     corpus: list of sentences
    Returns:
     function to compute sentence tokenization
    """
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(corpus)
    def tokenize_sentence(sentence):
        """
        Returns a list of string token given a sentence
        Parameters:
         sentence: sentence to tokenize
        Returns:
         list of string tokens
        """
        sequences = tokenizer.texts_to_sequences([sentence])
        sequence = sequences[0]
        tokenized_sentence = [tokenizer.index_word[i] for i in sequence]
        return tokenized_sentence
    
    return tokenize_sentence

In [19]:
def meteor_score(reference, candidate, tokenize_fun):
    """
    METEOR score
    Parameters:
        reference: reference translation
        candidate: generated translation
        tokenize_fun: tokenize function to apply to each sentence before score computation
    Returns:
        METEOR score
    """
    tokenized_candidate = tokenize_fun(candidate)
    tokenized_reference = tokenize_fun(reference)
    return round(single_meteor_score(tokenized_reference, tokenized_candidate), 4)

reference = "the cat is on the table"

candidate_one = "the dog is on the shelf"
candidate_two = "the cat is on the mat on the table"

sentence_tok = generate_sentence_tokenizer([reference, candidate_one, candidate_two])
sentence_tok("the cat is on the shelf")

score_one = meteor_score(reference, candidate_one, sentence_tok)
score_two = meteor_score(reference, candidate_two, sentence_tok)

print(f"Sentence: {candidate_one}")
print(f"Score: {score_one}")
print(f"Sentence: {candidate_two}")
print(f"Score: {score_two}")

Sentence: {candidate_one}
Score: {score_one}
Sentence: {candidate_two}
Score: {score_two}
