<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Implementation" data-toc-modified-id="Implementation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Implementation</a></span><ul class="toc-item"><li><span><a href="#N-Gram-Precision" data-toc-modified-id="N-Gram-Precision-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>N-Gram Precision</a></span></li><li><span><a href="#Modified-N-Gram-Precision" data-toc-modified-id="Modified-N-Gram-Precision-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Modified N-Gram Precision</a></span></li><li><span><a href="#BLEU-Score" data-toc-modified-id="BLEU-Score-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>BLEU Score</a></span></li></ul></li><li><span><a href="#Using-corpus_bleu()" data-toc-modified-id="Using-corpus_bleu()-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Using <code>corpus_bleu()</code></a></span></li><li><span><a href="#Using-sentence_bleu()" data-toc-modified-id="Using-sentence_bleu()-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Using <code>sentence_bleu()</code></a></span></li><li><span><a href="#Comparison" data-toc-modified-id="Comparison-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Comparison</a></span></li></ul></div>

In [248]:
import nltk
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import sentence_bleu
import numpy as np
from collections import Counter

# Implementation

In [272]:
refs = ["It is a guide to action that ensures that the military will forever heed Party commands", "It is the guiding principle which guarantees the military forces always being under the command of the Party", "It is the practical guide for the army always to heed the directions of the party"]
cand = "It is a guide to action which ensures that the military always obeys the commands of the party"

refs = [i.split() for i in refs]
cand = cand.split()

In [250]:
def simple_count(cand, n):
    return Counter(nltk.ngrams(cand, n))

## N-Gram Precision

In [251]:
def ngram_precision(cand, refs, n):
    ngram2cnt_ref = Counter()
    for ref in refs:
        ngram2cnt_ref += simple_count(ref, n)
    
    ngrams_cand = simple_count(cand, n)
    tot_cnt = 0
    for ngram, cnt in ngrams_cand.items():
        if ngram in ngram2cnt_ref:
            tot_cnt += cnt 
    return tot_cnt/len(cand) - n + 1

In [252]:
print(ngram_precision(cand, refs, 1))

0.08510638297872342


## Modified N-Gram Precision

In [253]:
def modified_ngram_precision(cand, refs, n):
    def max_ref_count(ngram, refs, n):
        maxim = 0
        for ref in refs:
            ngram2cnt_ref = simple_count(ref, n)
            if ngram2cnt_ref[ngram] > maxim:
                maxim = ngram2cnt_ref[ngram]
        return maxim
    
    def count_clip(ngram, cand, refs, n):
        return min(simple_count(cand, n)[ngram], max_ref_count(ngram, refs, n))
    
    sum_count_clip = 0
    for ngram, cnt in simple_count(cand, n).items():
        sum_count_clip += count_clip(ngram, cand, refs, n)
    return sum_count_clip/len(cand) - n + 1

In [254]:
print(modified_ngram_precision(cand, refs, 1))

0.010638297872340385


In [255]:
# cand = "the the the the the the the"
# print(ngram_precision(cand.split(" "), refs, 1))
# print(modified_ngram_precision(cand.split(" "), refs, 1))

## BLEU Score

In [256]:
def best_match_length(cand, refs):
    ref_lens = (len(ref) for ref in refs)
    return min(ref_lens, key=lambda x:(abs(x - len(cand)), x))

In [257]:
def brevity_penalty(cand, refs):
    c = len(cand)
    r = best_match_length(cand, refs)

    if c == 0:
        return 0
    else:
        if c > r:
            return 1
        else:
            return np.exp(1 - r/c)

In [258]:
def bleu_score(cand, refs, weights=[0.25, 0.25, 0.25, 0.25]):
    bp = brevity_penalty(cand, refs)

    p_n = [modified_ngram_precision(cand, refs, n=n) for n, _ in enumerate(weights,start=1)] 
    #p1, p2, p3, ..., pn
    score = np.sum([w_i * np.log(p_i) if p_i != 0 else 0 for w_i, p_i in zip(weights, p_n)])
    return bp * np.exp(score)

# Using `corpus_bleu()`

In [282]:
score = corpus_bleu([refs], [cand])
print(score)

0.5045666840058485


# Using `sentence_bleu()`

In [281]:
score = sentence_bleu(refs, cand)
print(score)

0.5045666840058485


# Comparison

In [284]:
print(bleu_score(cand, refs))

nan


  


In [None]:
# print(bleu_score(cand.split(),list(map(lambda ref: ref.split(), refs))))
print(bleu_score(cand,refs))
print(sentence_bleu(list(map(lambda ref: ref.split(), refs)),cand.split()))