## Modified Unigram Precision

In [2]:
import numpy as np
from collections import Counter
from nltk import ngrams

In [3]:
def simple_count (tokens, n):
    return Counter(ngrams(tokens, n))

In [4]:
candidate = "It is a guide to action which ensures that the military always obeys the commands of the party."
tokens = candidate.split()
result = simple_count(tokens, 1)
print("Unigram count : ", result)

Unigram count :  Counter({('the',): 3, ('It',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('which',): 1, ('ensures',): 1, ('that',): 1, ('military',): 1, ('always',): 1, ('obeys',): 1, ('commands',): 1, ('of',): 1, ('party.',): 1})


In [5]:
for temp in result :
    print(temp)

('It',)
('is',)
('a',)
('guide',)
('to',)
('action',)
('which',)
('ensures',)
('that',)
('the',)
('military',)
('always',)
('obeys',)
('commands',)
('of',)
('party.',)


In [6]:
candidate = 'the the the the the the the'
tokens = candidate.split()
result = simple_count(tokens, 1)
print("Unigram count : ", result)

Unigram count :  Counter({('the',): 7})


In [7]:
def count_clip (candidate, reference_list, n) :
    ca_cnt = simple_count(candidate, n)
    max_ref_cnt_dict = dict()
    
    for ref in reference_list :
        ref_cnt = simple_count(ref, n)
        
        for n_gram in ref_cnt :
            if n_gram in max_ref_cnt_dict :
                max_ref_cnt_dict[n_gram] = max(ref_cnt[n_gram], max_ref_cnt_dict[n_gram])
            else :
                max_ref_cnt_dict[n_gram] = ref_cnt[n_gram]
                
    return {
        n_gram: min(ca_cnt.get(n_gram, 0), max_ref_cnt_dict.get(n_gram, 0)) for n_gram in ca_cnt
    }

In [8]:
candidate = 'the the the the the the the'
references = [
    "the cat is on the mat",
    "there is a cat on the mat"
]
result = count_clip(candidate.split(), list(map(lambda ref: ref.split(), references)), 1)
print("Modified Unigram Count :", result)

Modified Unigram Count : {('the',): 2}


In [13]:
def modified_precision(candidate, reference_list, n) :
    #Numerator
    clip_cnt = count_clip(candidate, reference_list, n)
    total_clip_cnt = sum(clip_cnt.values()) 
    
    #Denominator
    cnt = simple_count(candidate, n)
    total_cnt = sum(cnt.values())
    
    if total_cnt == 0 :
        total_cnt=1
    
    return (total_clip_cnt / total_cnt)

In [14]:
result = modified_precision(candidate.split(), list(map(lambda ref: ref.split(), references)), n=1)
print("Modified Unigram Precision :",result)

Modified Unigram Precision : 0.2857142857142857


In [15]:
def closest_ref_length(candidate, reference_list) :
    ca_len = len(candidate)
    ref_lens = (len(ref) for ref in reference_list)
    
    closest_ref_len = min(ref_lens, key=lambda ref_len: (abs(ref_len - ca_len), ref_len))
    
    return closest_ref_len