In [74]:
import math
import numpy as np
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'ignore', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}

In [145]:
"""Task 0: Unigram BLEU score"""


def uni_bleu(references, sentence):
    """Calculates the unigram BLEU score for a sentence
    Args:
        references: list of reference translations
            each ref translation is a list of the words in the translation
        sentence: list containing the model proposed sentence
    Returns:
        the unigram BLEU score"""

    # Calculating P
    # Remove duplicates from sentence
    seen = set()
    unigrams = [x for x in sentence if not (x in seen or seen.add(x))]
    print(unigrams)
    # Count appearances in references and sentence for each unigram
    sent_app = [sentence.count(i) for i in unigrams]
    ref_app = []
    for ref in references:
        for word in unigrams:
            if word in ref and word not in ref_app:
                ref_app.append(word)
    
    # Calculate Precision
    uni_len = len(ref_app)
    sent_len = len(sentence)
    P = uni_len / sent_len
    # precision = []
    # for i, uni in enumerate(sent_app):
    #     try:
    #         precision.append(int(uni) / ref_app[..., i].max())
    #     except RuntimeWarning:
    #         precision.append(0)
    # precision = np.sum([v for v in precision if not math.isinf(v)]) / uni_len

    # Calculate Brevity Penalty
    BP = 1
    closest_length = min(len(ref) for ref in references)
    if sent_len < closest_length:
        BP = np.exp(1 - (closest_length/sent_len))

    return P * BP

In [146]:
references = [["the", "cat", "is", "on", "the", "mat"],
              ["there", "is", "a", "cat", "on", "the", "mat"]]
sentence = ["there", "is", "a", "cat", "here"]
uni_bleu(references=references, sentence=sentence)

['there', 'is', 'a', 'cat', 'here']


0.6549846024623855

In [185]:
"""Task 1: N-gram BLEU score"""
import numpy as np
from collections import Counter


def ngram_bleu(references, sentence, n):
    """Calculated the BLEU score for a sentence using n-gram BLEU algorithm
    Args:
        references: list of reference translations
            each ref translation is a list of the words in the translation
        sentence: list containing the model proposed sentence
        n: size of the n-gram to use for evaluation
    Returns:
        the n-gram BLEU score"""

    n_grams = n_gram_generator(sentence, n)

    # Calculate the precision for each n-gram
    # Count appearances in sentence and references for each n-gram
    sent_app = []
    ref_app = [[] for ref in references]
    for n_gram in n_grams:
        sent_app.append(n_gram_appearance(n_gram, sentence))
    for i, ref in enumerate(references):
        for n_gram in n_grams:
            ref_app[i].append(n_gram_appearance(n_gram, ref))

    # Merge the counts of appearances in references for max appearance
    ref_app_max = np.dstack(ref_app).max(axis=2)[0]

    # Calculate Precision
    P = np.sum(ref_app_max) / np.sum(sent_app)

    # Calculate Brevity Penalty
    BP = 1
    sent_len = len(sentence)
    closest_length = min(len(ref) for ref in references)
    if sent_len < closest_length:
        BP = np.exp(1 - (closest_length/sent_len))
    return P * BP

def n_gram_generator(sentence, n):
    """Generates a list of n-grams from a sentence
    Args:
        sentence: list containing the model proposed sentence
        n: size of the n-gram to generate
    Returns:
        list of n-grams"""

    n_grams = []
    for i in range(len(sentence) - n + 1):
        if sentence[i:i+n] not in n_grams:
            n_grams.append(sentence[i:i+n])

    return n_grams

def n_gram_appearance(n_gram, sentence):
    """Counts the number of appearances of a n-gram in a sentence
    Args:
        n_gram: n-gram to search for
        sentence: list containing the model proposed sentence
    Returns:
        number of appearances of n_gram in sentence"""

    count = 0
    for i in range(len(sentence) - len(n_gram) + 1):
        if sentence[i:i+len(n_gram)] == n_gram:
            count += 1

    return count


In [186]:
references = [["the", "cat", "is", "on", "the", "mat"], ["there", "is", "a", "cat", "on", "the", "mat"]]
sentence = ["there", "is", "a", "cat", "here"]

print(ngram_bleu(references, sentence, 2))

0.6140480648084865


In [191]:
"""Task 2: Cumulative N-gram BLEU score"""


def cumulative_bleu(references, sentence, n):
    """Calculates the cumulative n-gram BLEU score for a sentence
    Args:
        references: list of reference translations
        sentence: list containing the model proposed sentence
        n: size of the largest n-gram to use
    Returns:
        the cumulative n-gram BLEU score"""

    # Calculate the precision for each n-gram size
    precisions = []
    for i in range(1, n + 1):
        precisions.append(np.log(precision(references, sentence, i)))
    P = np.exp(np.sum(precisions) / n)

    # Calculate Brevity Penalty
    BP = 1
    sent_len = len(sentence)
    closest_length = min(len(ref) for ref in references)
    if sent_len < closest_length:
        BP = np.exp(1 - (closest_length/sent_len))
    return P * BP

def n_gram_generator(sentence, n):
    """Generates a list of n-grams from a sentence
    Args:
        sentence: list containing the model proposed sentence
        n: size of the n-gram to generate
    Returns:
        list of n-grams"""

    n_grams = []
    for i in range(len(sentence) - n + 1):
        if sentence[i:i+n] not in n_grams:
            n_grams.append(sentence[i:i+n])

    return n_grams

def n_gram_appearance(n_gram, sentence):
    """Counts the number of appearances of a n-gram in a sentence
    Args:
        n_gram: n-gram to search for
        sentence: list containing the model proposed sentence
    Returns:
        number of appearances of n_gram in sentence"""

    count = 0
    for i in range(len(sentence) - len(n_gram) + 1):
        if sentence[i:i+len(n_gram)] == n_gram:
            count += 1

    return count

def precision(references, sentence, n):
    """Calculates the precision for a sentence and a n-gram value
    Args:
        references: list of reference translations
        sentence: list containing the model proposed sentence
        n: size of the n-gram to use
    Returns:
        precision score"""

    n_grams = n_gram_generator(sentence, n)

    # Calculate the precision for each n-gram
    # Count appearances in sentence and references for each n-gram
    sent_app = []
    ref_app = [[] for ref in references]
    for n_gram in n_grams:
        sent_app.append(n_gram_appearance(n_gram, sentence))
    for i, ref in enumerate(references):
        for n_gram in n_grams:
            ref_app[i].append(n_gram_appearance(n_gram, ref))

    # Merge the counts of appearances in references for max appearance
    ref_app_max = np.dstack(ref_app).max(axis=2)[0]

    # Calculate Precision
    P = np.sum(ref_app_max) / np.sum(sent_app)

    return P

In [192]:
references = [["the", "cat", "is", "on", "the", "mat"], ["there", "is", "a", "cat", "on", "the", "mat"]]
sentence = ["there", "is", "a", "cat", "here"]

print(cumulative_bleu(references, sentence, 4))

0.5475182535069453
