In [None]:
from nltk.translate.meteor_score import single_meteor_score
from tensorflow.keras.preprocessing.text import Tokenizer

In [None]:
def generate_sentence_tokenizer(corpus):
    """
    Returns a function which tokenizes a sentence according to a vocabularu built on corpus
    Parameters:
     corpus: raw sentence
    Returns:
     function to compute sentence tokenization
    """
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts([corpus])
    def tokenize_sentence(sentence):
        """
        Returns a list of string token given a sentence
        Parameters:
         sentence: sentence to tokenize
        Returns:
         list of string tokens
        """
        sequences = tokenizer.texts_to_sequences([sentence])
        sequence = sequences[0]
        tokenized_sentence = [tokenizer.index_word[i] for i in sequence]
        return tokenized_sentence
    
    return tokenize_sentence

In [None]:
def meteor_score(reference, candidate, tokenize_fun):
    """
    METEOR score
    Parameters:
        reference: reference translation
        candidate: generated translation
        tokenize_fun: tokenize function to apply to each sentence before score computation
    Returns:
        METEOR score
    """
    tokenized_candidate = tokenize_fun(candidate)
    tokenized_reference = tokenize_fun(reference)
    return round(single_meteor_score(tokenized_reference, tokenized_candidate), 4)