# BLEU Implementation

In [1]:
import sys
import unittest
from math import exp, log, fsum
from functools import reduce
from operator import mul
from nltk.translate.bleu_score import sentence_bleu

In [2]:
# cand = "the cat the cat on the mat"
cand = "the the the the the the the the"
ref_1 = "the cat is on the mat"
ref_2 = "there is a cat on the mat"

In [3]:
cand = cand.split()
ref_1 = ref_1.split()
ref_2 = ref_2.split()

In [4]:
def word_count(str):
    counts = dict()
    words = sorted(str)
    for word in words:
        if word in counts:
            counts[word] += 1
        else:
            counts[word] = 1
    return counts

In [5]:
def mod_ngram_precision_count(cand_sent, ref_list, sum_count=True):
  cand_count = word_count(cand_sent)
  max_ref_count = dict.fromkeys(cand_count, 0)
  ref_counts = [word_count(ref) for ref in ref_list]

  for ref_count in ref_counts:
    for key, value in ref_count.items():
      if key in max_ref_count:
          if max_ref_count[key] < value: # update max count in refs
            max_ref_count[key] = value
  for key, value in max_ref_count.items():
    if cand_count[key] > value: # if candidate count more than max ref count
      cand_count[key] = value # clip with max ref count
  if sum_count==True:    
    return sum(cand_count.values())
  else:
    return cand_count

In [6]:
def ngram(text, gram_count):
  return [tuple(text[i:i+gram_count]) for i in range(len(text)-gram_count+1)]

In [9]:
'''first bleu written by Soh-eun
> ValueError: math domain error'''

def bleu(cand_sent, ref_list):
  closest_ref_len = min([len(i) for i in ref_list], key=lambda x:abs(x-len(cand_sent)))
  bp = min(1, exp(1 - closest_ref_len / len(cand_sent)))
  prec = [mod_ngram_precision_count(ngram(cand_sent, i), [ngram(ref, i) for ref in ref_list]) / len(ngram(cand, i)) for i in range(1,5)]
  return bp * reduce(mul, [exp(0.25 * log(prec_n)) for prec_n in prec], 1)

In [11]:
'''bleu edited by Dojun
> 0.7071067811865476 with "the the the ..." '''

def bleu(cand_sent, ref_list):
  closest_ref_len = min([len(i) for i in ref_list], key=lambda x:abs(x-len(cand_sent)))
  bp = min(1, exp(1 - closest_ref_len / len(cand_sent)))
  prec = [mod_ngram_precision_count(ngram(cand_sent, i), [ngram(ref, i) for ref in ref_list]) / len(ngram(cand_sent, i)) for i in range(1,5)]
  return bp * reduce(mul, [exp(0.25 * log(prec_n)) if prec_n != 0 else exp(0.25 * 0) for prec_n in prec], 1)

In [26]:
'''bleu edited by Dojun
> 0.0 with "the the the ..." '''

def bleu(cand_sent, ref_list):
  closest_ref_len = min([len(i) for i in ref_list], key=lambda x:abs(x-len(cand_sent)))
  bp = min(1, exp(1 - closest_ref_len / len(cand_sent)))
  prec = [mod_ngram_precision_count(ngram(cand_sent, i), [ngram(ref, i) for ref in ref_list]) / len(ngram(cand_sent, i)) for i in range(1,5)]
  return bp * reduce(mul, [exp(0.25 * log(prec_n)) if prec_n != 0 else 0 for prec_n in prec], 1)

In [28]:
'''bleu edited by Dojun
> 1.2882297539194154e-231 with "the the the ..." '''

def bleu(cand_sent, ref_list):
  closest_ref_len = min([len(i) for i in ref_list], key=lambda x:abs(x-len(cand_sent)))
  bp = min(1, exp(1 - closest_ref_len / len(cand_sent)))
  prec = [mod_ngram_precision_count(ngram(cand_sent, i), [ngram(ref, i) for ref in ref_list]) / len(ngram(cand_sent, i)) for i in range(1,5)]
  ls_prec = [0.25 * log(prec_n) if prec_n != 0 else 0.25 * log(sys.float_info.min) for prec_n in prec]
  return bp * exp(fsum(ls_prec))

In [44]:
print(bleu(cand, [ref_1, ref_2]))
print(sentence_bleu(references=[ref_1, ref_2], hypothesis=cand, weights=(0.25,0.25,0.25,0.25)))

1.2882297539194154e-231
1.2882297539194154e-231


In [45]:
#add smoothing function
from nltk.translate.bleu_score import SmoothingFunction
chencherry = SmoothingFunction()

print(bleu(cand, [ref_1, ref_2]))
print(sentence_bleu(references=[ref_1, ref_2], hypothesis=cand, weights=(0.25,0.25,0.25,0.25), smoothing_function=chencherry.method0))

1.2882297539194154e-231
1.2882297539194154e-231
