## Modified Unigram Precision

In [1]:
import numpy as np
from collections import Counter
from nltk import ngrams

In [2]:
def simple_count (tokens, n):
    return Counter(ngrams(tokens, n))

In [3]:
candidate = "It is a guide to action which ensures that the military always obeys the commands of the party."
tokens = candidate.split()
result = simple_count(tokens, 1)
print("Unigram count : ", result)

Unigram count :  Counter({('the',): 3, ('It',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('which',): 1, ('ensures',): 1, ('that',): 1, ('military',): 1, ('always',): 1, ('obeys',): 1, ('commands',): 1, ('of',): 1, ('party.',): 1})


In [4]:
for temp in result :
    print(temp)

('It',)
('is',)
('a',)
('guide',)
('to',)
('action',)
('which',)
('ensures',)
('that',)
('the',)
('military',)
('always',)
('obeys',)
('commands',)
('of',)
('party.',)


In [5]:
candidate = 'the the the the the the the'
tokens = candidate.split()
result = simple_count(tokens, 1)
print("Unigram count : ", result)

Unigram count :  Counter({('the',): 7})


In [6]:
def count_clip (candidate, reference_list, n) :
    ca_cnt = simple_count(candidate, n)
    max_ref_cnt_dict = dict()
    
    for ref in reference_list :
        ref_cnt = simple_count(ref, n)
        
        for n_gram in ref_cnt :
            if n_gram in max_ref_cnt_dict :
                max_ref_cnt_dict[n_gram] = max(ref_cnt[n_gram], max_ref_cnt_dict[n_gram])
            else :
                max_ref_cnt_dict[n_gram] = ref_cnt[n_gram]
                
    return {
        n_gram: min(ca_cnt.get(n_gram, 0), max_ref_cnt_dict.get(n_gram, 0)) for n_gram in ca_cnt
    }

In [7]:
candidate = 'the the the the the the the'
references = [
    "the cat is on the mat",
    "there is a cat on the mat"
]
result = count_clip(candidate.split(), list(map(lambda ref: ref.split(), references)), 1)
print("Modified Unigram Count :", result)

Modified Unigram Count : {('the',): 2}


In [8]:
def modified_precision(candidate, reference_list, n) :
    #Numerator
    clip_cnt = count_clip(candidate, reference_list, n)
    total_clip_cnt = sum(clip_cnt.values()) 
    
    #Denominator
    cnt = simple_count(candidate, n)
    total_cnt = sum(cnt.values())
    
    if total_cnt == 0 :
        total_cnt=1
    
    return (total_clip_cnt / total_cnt)

In [9]:
result = modified_precision(candidate.split(), list(map(lambda ref: ref.split(), references)), n=1)
print("Modified Unigram Precision :",result)

Modified Unigram Precision : 0.2857142857142857


In [10]:
def closest_ref_length(candidate, reference_list) :
    ca_len = len(candidate)
    ref_lens = (len(ref) for ref in reference_list)
    
    closest_ref_len = min(ref_lens, key=lambda ref_len: (abs(ref_len - ca_len), ref_len))
    
    return closest_ref_len

In [11]:
def brevity_penalty(candidate, reference_list) :
    ca_len = len(candidate)
    ref_len = closest_ref_length(candidate, reference_list)
    
    if ca_len > ref_len :
        return 1
    elif ca_len == 0 :
        return 0
    else :
        return np.exp(1-(ref_len/ca_len))

In [12]:
def bleu_score(candidate, reference_list, weights=[0.25, 0.25, 0.25, 0.25]) :
    bp = brevity_penalty(candidate, reference_list)
    
    p_n = [modified_precision(candidate, reference_list, n=n) for n, _ in enumerate(weights,start=1)]
    
    score = np.sum([w_i * np.log(p_i) if p_i != 0 else 0 for w_i, p_i in zip(weights, p_n)])
    
    return bp * np.exp(score)

## Activation

In [13]:
candidate = 'It is a guide to action which ensures that the military always obeys the commands of the party'
references = [
    'It is a guide to action that ensures that the military will forever heed Party commands',
    'It is the guiding principle which guarantees the military forces always being under the command of the Party',
    'It is the practical guide for the army always to heed the directions of the party'
]

In [14]:
print("BLEU Score : ",bleu_score(candidate.split(), list(map(lambda ref: ref.split(), references))))

BLEU Score :  0.5045666840058485


## NLTK BLEU

In [18]:
from nltk.translate import bleu_score

In [19]:
print("BLEU Score : ", bleu_score.sentence_bleu(list(map(lambda ref: ref.split(), references)), candidate.split()))

BLEU Score :  0.5045666840058485
