In [1]:
from collections import Counter
import math

In [2]:
class CalcBleuScore(object):
    def pad_sequence(
        sequence,
        n,
        pad_left=False,
        pad_right=False,
        left_pad_symbol=None,
        right_pad_symbol=None,
    ):
        sequence = iter(sequence)
        if pad_left:
            sequence = chain((left_pad_symbol,) * (n - 1), sequence)
        if pad_right:
            sequence = chain(sequence, (right_pad_symbol,) * (n - 1))
        return sequence

    def ngrams(
        sequence,
        n,
        pad_left=False,
        pad_right=False,
        left_pad_symbol=None,
        right_pad_symbol=None,
    ):

        sequence = CalcBleuScore.pad_sequence(
            sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol
        )

        history = []
        while n > 1:
            # PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator
            try:
                next_item = next(sequence)
            except StopIteration:
                # no more data, terminate the generator
                return
            history.append(next_item)
            n -= 1
        for item in sequence:
            history.append(item)
            yield tuple(history)
            del history[0]

    # 단순 카운트 함수(Count)
    def simple_count(tokens, n): # 토큰화 된 candidate 문장, n-gram에서의 n 이 두 가지를 인자로 받음.
        return Counter(CalcBleuScore.ngrams(tokens, n)) #문장에서 n-gram을 카운트

    # Count_clip
    def count_clip(candidate, reference_list, n):
        cnt_ca = CalcBleuScore.simple_count(candidate, n)
        # Ca 문장에서 n-gram 카운트
        temp = dict()

        for ref in reference_list: # 다수의 Ref 문장에 대해서 이하 반복
            cnt_ref = CalcBleuScore.simple_count(ref, n)
            # Ref 문장에서 n-gram 카운트

            for n_gram in cnt_ref: # 모든 Ref에 대해서 비교하여 특정 n-gram이 하나의 Ref에 가장 많이 등장한 횟수를 저장
                if n_gram in temp:
                    temp[n_gram] = max(cnt_ref[n_gram], temp[n_gram]) # max_ref_count
                else:
                    temp[n_gram] = cnt_ref[n_gram]

        return {
            n_gram: min(cnt_ca.get(n_gram, 0), temp.get(n_gram, 0)) for n_gram in cnt_ca
            # count_clip=min(count, max_ref_count)
            # 위의 get은 찾고자 하는 n-gram이 없으면 0을 반환한다.
         }

    # 퍼센트 계산
    def modified_precision(candidate, reference_list, n):
        clip = CalcBleuScore.count_clip(candidate, reference_list, n) 
        total_clip = sum(clip.values()) # 분자

        ct = CalcBleuScore.simple_count(candidate, n)
        total_ct = sum(ct.values()) #분모

        if total_ct==0: # n-gram의 n이 커졌을 때 분모가 0이 되는 것을 방지
          total_ct=1
                
        return (total_clip / total_ct) # 보정된 정밀도

    # Ca 길이와 가장 근접한 Ref의 길이를 리턴하는 함수(Ca가 길면 상관이 없고, Ca가 Ref보다 짧을 경우)
    # Ca가 10이고 Ref1,2가 각각 9, 11이라면 똑같은 1차이가 나지만 더 짧은 Ref9를 선택함
    def closest_ref_length(candidate, reference_list):
        ca_len = len(candidate) # ca 길이
        ref_lens = (len(ref) for ref in reference_list) # Ref들의 길이
        closest_ref_len = min(ref_lens, key=lambda ref_len: (abs(ref_len - ca_len), ref_len))
        # 길이 차이를 최소화하는 Ref를 찾아서 Ref의 길이를 리턴
        return closest_ref_len

    # 길이패널티 구하는 함수(브레버티 패널티(Brevity Penalty))
    def brevity_penalty(candidate, reference_list):
        ca_len = len(candidate)
        ref_len = CalcBleuScore.closest_ref_length(candidate, reference_list)

        if ca_len > ref_len:
            return 1
        elif ca_len == 0 :
        # candidate가 비어있다면 BP = 0 → BLEU = 0.0
            return 0
        else:
            return math.exp(1 - ref_len/ca_len)

    def set_weights(reference_list, weights):  
        refer_count = 0
        for x in reference_list:
            if len(x) > refer_count:
                refer_count = len(x)
        if refer_count > 3:
            return weights         
        elif refer_count == 3 :
            return [0.33, 0.33, 0.34, 0.0]
        elif refer_count == 2 :
            return [0.5, 0.5, 0.0, 0.0]
        else :
            return [1.0, 0.0, 0.0, 0.0]

    # 최종 블루스코어 계산 함수
    def bleu_score(candidate, reference_list, weights=[0.25, 0.25, 0.25, 0.25]):
        weights = CalcBleuScore.set_weights(reference_list, weights)

        bp = CalcBleuScore.brevity_penalty(candidate, reference_list) # 브레버티 패널티, BP

        p_n = [CalcBleuScore.modified_precision(candidate, reference_list, n=n) for n, _ in enumerate(weights,start=1)] 
        #p1, p2, p3, ..., pn            
        score = 0
        for w_i, p_i in zip(weights, p_n):
            if p_i != 0:
                score += w_i * math.log(p_i)
            elif w_i != 0 and p_i == 0:
                if math.log(math.e * w_i) > 0:
                    score += (math.log(math.e * w_i) * -1)
                else:
                    score += math.log(math.e * w_i)
            else:
                score += 0

        if sum(p_n) == 0:
            return 0
        else:
            return round(bp * math.exp(score), 2) * 100

In [4]:
candidate = '지금 테스트를 하는 중이에요.'
references = ['지금 테스트 중이에요.']

result = CalcBleuScore.bleu_score(candidate.split(), list(map(lambda ref: ref.split(), references)))
print(result)

66.0
