In [7]:
import sys
import unittest
from math import exp, log, fsum
from functools import reduce
from operator import mul
from nltk.translate.bleu_score import sentence_bleu


def word_count(str):
    counts = dict()
    words = sorted(str)
    for word in words:
        if word in counts:
            counts[word] += 1
        else:
            counts[word] = 1
    return counts


def mod_ngram_precision_count(cand_sent, ref_list, sum_count=True):
  cand_count = word_count(cand_sent)
  max_ref_count = dict.fromkeys(cand_count, 0)
  ref_counts = [word_count(ref) for ref in ref_list]

  for ref_count in ref_counts:
    for key, value in ref_count.items():
      if key in max_ref_count:
          if max_ref_count[key] < value: # update max count in refs
            max_ref_count[key] = value
  for key, value in max_ref_count.items():
    if cand_count[key] > value: # if candidate count more than max ref count
      cand_count[key] = value # clip with max ref count
  if sum_count==True:    
    return sum(cand_count.values())
  else:
    return cand_count


def ngram(text, gram_count):
  return [tuple(text[i:i+gram_count]) for i in range(len(text)-gram_count+1)]


def bleu(cand_sent, ref_list):
  closest_ref_len = min([len(i) for i in ref_list], key=lambda x:abs(x-len(cand_sent)))
  bp = min(1, exp(1 - closest_ref_len / len(cand_sent)))
  prec = [mod_ngram_precision_count(ngram(cand_sent, i), [ngram(ref, i) for ref in ref_list]) / len(ngram(cand_sent, i)) for i in range(1,5)]
  ls_prec = [0.25 * log(prec_n) if prec_n != 0 else 0.25 * log(sys.float_info.min) for prec_n in prec]
  return bp * exp(fsum(ls_prec))

In [10]:
# cand = "the cat the cat on the mat"
cand = "the the the the the the the the"
ref_1 = "the cat is on the mat"
ref_2 = "there is a cat on the mat"

cand = cand.split()
ref_1 = ref_1.split()
ref_2 = ref_2.split()

print(bleu(cand, [ref_1, ref_2]))
print(sentence_bleu(references=[ref_1, ref_2], hypothesis=cand, weights=(0.25,0.25,0.25,0.25)))

1.2882297539194154e-231
1.2882297539194154e-231
