/
Eval_Rouge.py
43 lines (34 loc) · 1.31 KB
/
Eval_Rouge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import sys
import glob
import json
import os
import time
from Rouge import *
def rounder(num): #保留两位小数,四舍五入
return round(num, 2)
def rouge_max_over_ground_truths(prediction, ground_truths):
scores_for_rouge1 = []
scores_for_rouge2 = []
scores_for_rougel = []
for ground_truth in ground_truths:
score = cal_rouge([prediction], [ground_truth])
scores_for_rouge1.append(score[0])
scores_for_rouge2.append(score[1])
scores_for_rougel.append(score[2])
return max(scores_for_rouge1), max(scores_for_rouge2), max(scores_for_rougel)
def cal_rouge(infer, ref):
x = rouge(infer, ref)
return x['rouge_1/f_score'] * 100, x['rouge_2/f_score'] * 100, x['rouge_l/f_score'] * 100
def eval_rouge(systems, refs):
rouge_1 = rouge_2 = rouge_l = total = 0
assert len(systems) == len(refs), "the length of predicted span and ground_truths span should be same"
for i, pre in enumerate(systems):
rouge_result = rouge_max_over_ground_truths(pre, refs[i])
rouge_1 += rouge_result[0]
rouge_2 += rouge_result[1]
rouge_l += rouge_result[2]
total += 1
rouge_1 = rouge_1 / total
rouge_2 = rouge_2 / total
rouge_l = rouge_l / total
return {'ROUGE_1_F1':rouge_1, 'ROUGE_2_F1':rouge_2, 'ROUGE_L_F1':rouge_l}