<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Implementation" data-toc-modified-id="Implementation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Implementation</a></span><ul class="toc-item"><li><span><a href="#N-Gram-Precision" data-toc-modified-id="N-Gram-Precision-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>N-Gram Precision</a></span></li><li><span><a href="#Modified-N-Gram-Precision" data-toc-modified-id="Modified-N-Gram-Precision-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Modified N-Gram Precision</a></span></li><li><span><a href="#BLEU-Score" data-toc-modified-id="BLEU-Score-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>BLEU Score</a></span></li></ul></li><li><span><a href="#Using-sentence_bleu()" data-toc-modified-id="Using-sentence_bleu()-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Using <code>sentence_bleu()</code></a></span></li><li><span><a href="#Using-corpus_bleu()" data-toc-modified-id="Using-corpus_bleu()-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Using <code>corpus_bleu()</code></a></span></li></ul></div>

In [1]:
import nltk
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import sentence_bleu
import numpy as np
from collections import Counter

In [2]:
refs = ["It is the guiding principle which guarantees the military forces always being under the command of the party on", "It is a guide to action that ensures that the military will forever heed party commands", "It is the practical guide for the army always to heed the directions of the party"]
cand = "It is a guide to action which ensures that the military always obeys the commands of the party"

refs = [i.split() for i in refs]
cand = cand.split()

# Implementation

## N-Gram Precision

In [3]:
def count_ngram(cand, n):
    return Counter(nltk.ngrams(cand, n))

def ngram_precision(refs, cand, n):
    counter_refs = Counter()
    for ref in refs:
        counter_refs += count_ngram(ref, n)
    
    ngrams_cand = count_ngram(cand, n)
    tot_cnt = 0
    for ngram, cnt in ngrams_cand.items():
        if ngram in counter_refs:
            tot_cnt += cnt 
    return tot_cnt/len(cand) - n + 1

## Modified N-Gram Precision

In [4]:
def max_ref_count(ngram, refs, n):
    maxim = 0
    for ref in refs:
        ngram2cnt_ref = count_ngram(ref, n)
        if ngram2cnt_ref[ngram] > maxim:
            maxim = ngram2cnt_ref[ngram]
    return maxim

def count_clip(ngram, cand, refs, n):
    return min(count_ngram(cand, n)[ngram], max_ref_count(ngram, refs, n))

def modified_ngram_precision(refs, cand, n):
    sum_count_clip = 0
    for ngram, cnt in count_ngram(cand, n).items():
        sum_count_clip += count_clip(ngram, cand, refs, n)
    return sum_count_clip/(len(cand) - n + 1)

In [5]:
# cand = "the the the the the the the"
# print(ngram_precision(cand.split(" "), refs, 1))
# print(modified_ngram_precision(cand.split(" "), refs, 1))

## BLEU Score

In [6]:
def best_match_length(refs, cand):
    ref_lens = [len(ref) for ref in refs]
    return min(ref_lens, key=lambda x:(abs(x - len(cand)), x))

def brevity_penalty(refs, cand):
    c = len(cand)
    r = best_match_length(refs, cand)

    if c == 0:
        return 0
    else:
        if c <= r:
            return np.exp(1 - r/c)
        else:
            return 1

def bleu_score(refs, cand, weights=[0.25, 0.25, 0.25, 0.25]):
    ps = [modified_ngram_precision(refs, cand, n=k + 1) for k, _ in enumerate(weights)]
    score = sum([w*np.log(p) if p != 0 else 0 for w, p in zip(weights, ps)])
    return brevity_penalty(refs, cand)*np.exp(score)

In [7]:
print(bleu_score(refs, cand))

0.47729963243022017


# Using `sentence_bleu()`

In [8]:
score = sentence_bleu(refs, cand)
print(score)

0.47729963243022017


# Using `corpus_bleu()`

In [9]:
score = corpus_bleu([refs], [cand])
print(score)

0.47729963243022017
