In [1]:
import math

In [2]:
def bleu_score(references, candidate):
    """Function to Calculate Bleu Score Mathematically"""
    # Tokenize the sentences
    reference_tokens = [ref.lower().split() for ref in references]
    candidate_tokens = candidate.lower().split()
    
    # Declare list for precision
    precisions = []
    # Task instr "Only consider upto n=4 n-grams"
    n = 4  
        
    for i in range(1, n+1):
        ngram_ref = []
        # Calculate ngrams for refs
        for ref in reference_tokens:
            for j in range(len(ref)-i+1):
                ngram_ref.append(tuple(ref[j:j+i]))
        
        # Calculate ngrams for candidates
        ngram_candidate = []
        for j in range(len(candidate_tokens)-i+1):
            ngram_candidate.append(tuple(candidate_tokens[j:j+i]))
        
        if len(ngram_candidate) == 0:
            precision = 0.0
        else:
            common_ngrams = set(ngram_ref) & set(ngram_candidate)
            precision = len(common_ngrams) / len(ngram_candidate)
        
        precisions.append(precision)
    
    # Calculate geometric mean of precisions
    if any(p == 0.0 for p in precisions):
        geometric_mean_precision = 0.0  # Using if statement so we don't end up with log(0)
    else:
        sum_logs = sum(math.log(p) for p in precisions)
        geometric_mean_precision = math.exp(sum_logs / n)
    
    # Calculating Brevity-Penalty
    c = len(candidate_tokens)
    r = min(len(ref) for ref in reference_tokens)
    if c > r:
        brevity_penalty = 1.0
    else:
        brevity_penalty = math.exp(1 - r / c)
    
    # Finallly calculate bleu score
    bleu_score = brevity_penalty * geometric_mean_precision
    
    return bleu_score

In [3]:
references1 = [
    "It is a guide to action that ensures that the military will forever heed Party commands",
    "It is the guiding principle which guarantees the military forces always being under the command of the Party",
    "It is the practical guide for the army always to heed the directions of the party"
]
candidate1 = "It is a guide to action which ensures that the military always obeys the commands of the party"
bleu_score1 = bleu_score(references1, candidate1)
print(bleu_score1)

0.48902285358302


In [4]:
references2 = [
    "It is a guide to action that ensures that the military will forever heed Party commands",
    "It is the guiding principle which guarantees the military forces always being under the command of the Party",
    "It is the practical guide for the army always to heed the directions of the party"
]
candidate2 = "It is the to action the troops forever hearing the activity guidebook that party direct"
bleu_score2 = bleu_score(references2, candidate2)
print(bleu_score2)

0.0


Using NLTK to cross-check

In [5]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

references1 = [
    "It is a guide to action that ensures that the military will forever heed Party commands",
    "It is the guiding principle which guarantees the military forces always being under the command of the Party",
    "It is the practical guide for the army always to heed the directions of the party"
]

candidate1 = "It is a guide to action which ensures that the military always obeys the commands of the party"

# Tokenize references and candidate
references1_tokenized = [[token.lower() for token in nltk.word_tokenize(ref)] for ref in references1]
candidate1_tokenized = [token.lower() for token in nltk.word_tokenize(candidate1)]

# Compute BLEU score
bleu_score1 = sentence_bleu(references1_tokenized, candidate1_tokenized, smoothing_function=SmoothingFunction().method1)

print(f"BLEU Score for case 1: {bleu_score1}")

BLEU Score for case 1: 0.5045666840058485


In [6]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

references2 = [
    "It is a guide to action that ensures that the military will forever heed Party commands",
    "It is the guiding principle which guarantees the military forces always being under the command of the Party",
    "It is the practical guide for the army always to heed the directions of the party"
]

candidate2 = "It is the to action the troops forever hearing the activity guidebook that party direct"

# Tokenize references and candidate
references2_tokenized = [[token.lower() for token in nltk.word_tokenize(ref)] for ref in references2]
candidate2_tokenized = [token.lower() for token in nltk.word_tokenize(candidate2)]

# Compute BLEU score
bleu_score2 = sentence_bleu(references2_tokenized, candidate2_tokenized, smoothing_function=SmoothingFunction().method1)

print(f"BLEU Score for case 2: {bleu_score2}")

BLEU Score for case 2: 0.09151480843357339
