In [1]:
import json
import evaluate

In [2]:
evaluate.load("rouge")

EvaluationModule(name: "rouge", module_type: "metric", features: [{'predictions': Value(dtype='string', id='sequence'), 'references': Sequence(feature=Value(dtype='string', id='sequence'), length=-1, id=None)}, {'predictions': Value(dtype='string', id='sequence'), 'references': Value(dtype='string', id='sequence')}], usage: """
Calculates average rouge scores for a list of hypotheses and references
Args:
    predictions: list of predictions to score. Each prediction
        should be a string with tokens separated by spaces.
    references: list of reference for each prediction. Each
        reference should be a string with tokens separated by spaces.
    rouge_types: A list of rouge types to calculate.
        Valid names:
        `"rouge{n}"` (e.g. `"rouge1"`, `"rouge2"`) where: {n} is the n-gram based scoring,
        `"rougeL"`: Longest common subsequence based scoring.
        `"rougeLSum"`: rougeLsum splits text using `"
"`.
        See details in https://github.com/huggingface/

## NAIVE
1. input: document => output: summary
    -  rouge 다시 측정할 필요 없음

In [3]:
with open(f'../results/NAIVE/results_agg_temp.json') as f:
    data_NAIVE = json.load(f)

In [4]:
data_NAIVE['prediction'][0]['pred']

"President Barack Obama invited the Super Bowl champion New England Patriots to the White House on Thursday - but couldn't help but crack one last deflategate joke in his speech on the South Lawn."

In [5]:
data_NAIVE['prediction'][0]['label']

"Brady cited 'prior family commitments' in bowing out of meeting with Obama.\nHas been to the White House to meet President George W. Bush for previous Super Bowl wins."

In [6]:
NAIVE_SUM_rouge = data_NAIVE['metric']['rouge']
NAIVE_SUM_rouge

{'rouge1': 0.3221336740682571,
 'rouge2': 0.14995722760731672,
 'rougeL': 0.24013723693875919,
 'rougeLsum': 0.2964324905510362}

## KW
2. input: document => output: key words, summary

In [7]:
with open(f'../results/KW/results_agg_temp.json') as f:
    data_KW = json.load(f)

In [8]:
data_KW['prediction'][0]['pred'].replace('Key Words: ', '').split("\nSummary: ")

["british,bioinformatics,sports,sports psychology,sports administration,sports broadcasting,sports media,sports management,sports medicine,sports television,sports marketing,sports social media.sports commentators,sports commentators: sports commentators: 'I'm worried that 11 out of 12 of the 12 footballs used in the AFC Championship game were underinflated'"]

In [9]:
data_KW['prediction'][0]['label'].replace('Key Words: ', '').split("\nSummary: ")

['social media,sports medicine,public health,physiology,biomedical research,bioinformatics,cardiology,neurology,neonatology,nephrology',
 "Brady cited 'prior family commitments' in bowing out of meeting with Obama.\nHas been to the White House to meet President George W. Bush for previous Super Bowl wins."]

In [10]:
KW_KW_pred = []
KW_KW_label = []
KW_SUM_pred = []
KW_SUM_label = []
KW_no_summary = []
KW_no_summary_error = []

data_length = len(data_KW['prediction'])

for i in range(data_length):
    ex_pred = data_KW['prediction'][i]['pred'].replace('Key Words: ', '').split("\nSummary: ")
    ex_label = data_KW['prediction'][i]['label'].replace('Key Words: ', '').split("\nSummary: ")
    try:
        KW_KW_pred.append(ex_pred[0])
        KW_KW_label.append(ex_label[0])
        KW_SUM_pred.append(ex_pred[1]) 
        KW_SUM_label.append(ex_label[1])
    except:
        try:
            KW_KW_pred.append(ex_pred[0])
            KW_KW_label.append(ex_label[0])
            KW_no_summary.append(i)
            # print(f'Prediction {i} Has No Summary.')
        except: 
            KW_no_summary.append(i)
            KW_no_summary_error.append(i)
            print(f'Prediction {i} Has No Summary & Key Words Error Occured.')

In [11]:
KW_pred_length = len(data_KW['prediction'])
KW_no_summary_length = len(KW_no_summary)
print(f'{KW_pred_length}개 데이터 중 {KW_no_summary_length}개 데이터에 요약문이 없습니다.')

500개 데이터 중 347개 데이터에 요약문이 없습니다.


In [12]:
len(KW_KW_pred) == len(KW_KW_label) and len(KW_SUM_pred) == len(KW_SUM_label)

True

In [13]:
rouge = evaluate.load('rouge')

In [14]:
KW_KW_rouge = rouge.compute(references=KW_KW_label, predictions=KW_KW_pred, use_aggregator=True)
KW_SUM_rouge = rouge.compute(references=KW_SUM_label, predictions=KW_SUM_pred, use_aggregator=True)

In [15]:
KW_KW_rouge

{'rouge1': 0.17594659025656523,
 'rouge2': 0.08139417571716331,
 'rougeL': 0.13176789234658098,
 'rougeLsum': 0.13991956279206144}

In [16]:
KW_SUM_rouge

{'rouge1': 0.3398563544399322,
 'rouge2': 0.140142525788538,
 'rougeL': 0.24744224570609918,
 'rougeLsum': 0.310109314068133}

In [17]:
KW_KW_rouge_ex = rouge.compute(references=KW_KW_label, predictions=KW_KW_pred, use_aggregator=False)
KW_SUM_rouge_ex = rouge.compute(references=KW_SUM_label, predictions=KW_SUM_pred, use_aggregator=False)

## KS
3. input: document => output: key sentenece, summary

In [18]:
with open(f'../results/KS/results_agg_temp.json') as f:
    data_KS = json.load(f)

In [19]:
data_KS['prediction'][0]['pred'].replace('Key Sentence: ', '').split("\nSummary: ")

["President Barack Obama invited the Super Bowl champion New England Patriots to the White House on Thursday - but couldn't help but get one last deflategate joke in.",
 "The president opened his speech on the South Lawn by remarking 'that whole (deflgate) story got blown out of proportion,' referring to an investigation that 11 out of 12 footballs used in the AFC Championship game were under-inflated."]

In [20]:
data_KS['prediction'][0]['label'].replace('Key Sentence: ', '').split("\nSummary: ")

['Brady went to the White House to meet President George W Bush after winning the Super Bowl in 2005 and in 2004.',
 "Brady cited 'prior family commitments' in bowing out of meeting with Obama.\nHas been to the White House to meet President George W. Bush for previous Super Bowl wins."]

In [21]:
KS_KS_pred = []
KS_KS_label = []
KS_SUM_pred = []
KS_SUM_label = []
KS_no_summary = []
KS_no_summary_error = []

data_length = len(data_KS['prediction'])

for i in range(data_length):
    ex_pred = data_KS['prediction'][i]['pred'].replace('Key Sentence: ', '').split("\nSummary: ")
    ex_label = data_KS['prediction'][i]['label'].replace('Key Sentence: ', '').split("\nSummary: ")
    try:
        KS_KS_pred.append(ex_pred[0])
        KS_KS_label.append(ex_label[0])
        KS_SUM_pred.append(ex_pred[1]) 
        KS_SUM_label.append(ex_label[1])
    except:
        try :
            KS_KS_pred.append(ex_pred[0])
            KS_KS_label.append(ex_label[0])
            KS_no_summary.append(i)
            # print(f'Prediction {i} Has No Summary.')
        except: 
            KS_no_summary.append(i)
            KS_no_summary_error.append(i)
            print(f'Prediction {i} Has No Summary & Key Sentence Error Occured.')

In [22]:
KS_pred_length = len(data_KS['prediction'])
KS_no_summary_length = len(KS_no_summary)
print(f'{KS_pred_length}개 데이터 중 {KS_no_summary_length}개 데이터에 요약문이 없습니다.')

500개 데이터 중 170개 데이터에 요약문이 없습니다.


In [23]:
len(KS_KS_pred) == len(KS_KS_label) and len(KS_SUM_pred) == len(KS_SUM_label)

True

In [24]:
rouge = evaluate.load('rouge')

In [25]:
KS_KS_rouge = rouge.compute(references=KS_KS_label, predictions=KS_KS_pred, use_aggregator=True)
KS_SUM_rouge = rouge.compute(references=KS_SUM_label, predictions=KS_SUM_pred, use_aggregator=True)

In [26]:
KS_KS_rouge

{'rouge1': 0.35016004828349173,
 'rouge2': 0.23433034818722726,
 'rougeL': 0.3130680755838632,
 'rougeLsum': 0.3150138318178245}

In [27]:
KS_SUM_rouge

{'rouge1': 0.3061292159414556,
 'rouge2': 0.11942117190814014,
 'rougeL': 0.2227569231342712,
 'rougeLsum': 0.2714600595357948}

In [28]:
KS_KS_rouge_ex = rouge.compute(references=KS_KS_label, predictions=KS_KS_pred, use_aggregator=False)
KS_SUM_rouge_ex = rouge.compute(references=KS_SUM_label, predictions=KS_SUM_pred, use_aggregator=False)

## KWKS
4. input: document => ouptut: key words, key sentence, summary

In [29]:
with open(f'../results/KWKS/results_agg_temp.json') as f:
    data_KWKS = json.load(f)

In [30]:
temp = data_KWKS['prediction'][0]['pred'].replace('Key Words: ', '')
KW_KS_border = temp.find('\nKey Sentence: ')
KS_SUM_border = temp.find('\nSummary: ')
print(temp[:KW_KS_border].strip(),'\n', 
      temp[KW_KS_border+len('\nKey Sentence: '):KS_SUM_border].strip(), '\n', 
      temp[KS_SUM_border+len('\nSummary: '):].strip())

british,bioinformatics,dentistry,dietary medicine,dental,clinical research,clinical,clinical 
 The Super Bowl champion New England Patriots gathered the team's four Super Bowl trophies won under Coach Bill Belichick (right, next to President Barack Obama). 
 Tom Brady won his fourth Super Bowl ring in February - and his first since President Obama took office.


In [61]:
KWKS_KW_pred = []
KWKS_KW_label = []
KWKS_KS_pred = []
KWKS_KS_label = []
KWKS_SUM_pred = []
KWKS_SUM_label = []
KWKS_no_summary = []

data_length = len(data_KWKS['prediction'])

for i in range(data_length):
    ex_pred = data_KWKS['prediction'][i]['pred'].replace('Key Words: ', '')
    ex_label = data_KWKS['prediction'][i]['label'].replace('Key Words: ', '')
    
    KW_KS_border_pred = ex_pred.find('\nKey Sentence: ')
    KS_SUM_border_pred = ex_pred.find('\nSummary: ')

    KW_KS_border_label = ex_pred.find('\nKey Sentence: ')
    KS_SUM_border_label = ex_pred.find('\nSummary: ')
    
    KWKS_KW_pred.append(ex_pred[:KW_KS_border_pred].strip())
    KWKS_KW_label.append(ex_label[:KW_KS_border_label].strip())

    KWKS_KS_pred.append(ex_pred[KW_KS_border_pred+len('\nKey Sentence: '):KS_SUM_border_pred].strip())
    KWKS_KS_label.append(ex_label[KW_KS_border_label+len('\nKey Sentence: '):KS_SUM_border_label].strip())

    KWKS_SUM_pred.append(ex_pred[KS_SUM_border_pred+len('\nSummary: '):].strip())
    KWKS_SUM_label.append(ex_label[KS_SUM_border_label+len('\nSummary: '):].strip())

In [62]:
len(KWKS_KW_pred) == len(KWKS_KW_label) and len(KWKS_KS_pred) == len(KWKS_KS_label) and len(KWKS_SUM_pred) == len(KWKS_SUM_label)

True

In [63]:
rouge = evaluate.load('rouge')

In [64]:
KWKS_KW_rouge = rouge.compute(references=KWKS_KW_label, predictions=KWKS_KW_pred, use_aggregator=True)
KWKS_KS_rouge = rouge.compute(references=KWKS_KS_label, predictions=KWKS_KS_pred, use_aggregator=True)
KWKS_SUM_rouge = rouge.compute(references=KWKS_SUM_label, predictions=KWKS_SUM_pred, use_aggregator=True)

In [65]:
KWKS_KW_rouge

{'rouge1': 0.2578916722541682,
 'rouge2': 0.13666974525311157,
 'rougeL': 0.19523542322170873,
 'rougeLsum': 0.217793445478532}

In [66]:
KWKS_KS_rouge

{'rouge1': 0.20929023673258262,
 'rouge2': 0.11003549853568702,
 'rougeL': 0.1639255958063478,
 'rougeLsum': 0.184723365426793}

In [67]:
KWKS_SUM_rouge

{'rouge1': 0.18176518591363117,
 'rouge2': 0.09462037611688978,
 'rougeL': 0.13696156247397845,
 'rougeLsum': 0.1655366689500105}

In [68]:
KWKS_KW_rouge_ex = rouge.compute(references=KWKS_KW_label, predictions=KWKS_KW_pred, use_aggregator=False)
KWKS_KS_rouge_ex = rouge.compute(references=KWKS_KS_label, predictions=KWKS_KS_pred, use_aggregator=False)
KWKS_SUM_rouge_ex = rouge.compute(references=KWKS_SUM_label, predictions=KWKS_SUM_pred, use_aggregator=False)

## Evaluation Result

In [39]:
import pandas as pd

In [69]:
pd.DataFrame({'NAIVE_SUM' : NAIVE_SUM_rouge,
             'KW_KW' : KW_KW_rouge, 
             'KW_SUM' : KW_SUM_rouge, 
             'KS_KS' : KS_KS_rouge,
             'KS_SUM' : KS_SUM_rouge, 
             'KWKS_KW' : KWKS_KW_rouge, 
             'KWKS_KS' : KWKS_KS_rouge, 
             'KWKS_SUM' : KWKS_SUM_rouge})

Unnamed: 0,NAIVE_SUM,KW_KW,KW_SUM,KS_KS,KS_SUM,KWKS_KW,KWKS_KS,KWKS_SUM
rouge1,0.322134,0.175947,0.339856,0.35016,0.306129,0.257892,0.20929,0.181765
rouge2,0.149957,0.081394,0.140143,0.23433,0.119421,0.13667,0.110035,0.09462
rougeL,0.240137,0.131768,0.247442,0.313068,0.222757,0.195235,0.163926,0.136962
rougeLsum,0.296432,0.13992,0.310109,0.315014,0.27146,0.217793,0.184723,0.165537


In [70]:
pd.DataFrame({'NAIVE_SUM' : NAIVE_SUM_rouge,
             'KW_SUM' : KW_SUM_rouge, 
             'KS_SUM' : KS_SUM_rouge, 
             'KWKS_SUM' : KWKS_SUM_rouge})

Unnamed: 0,NAIVE_SUM,KW_SUM,KS_SUM,KWKS_SUM
rouge1,0.322134,0.339856,0.306129,0.181765
rouge2,0.149957,0.140143,0.119421,0.09462
rougeL,0.240137,0.247442,0.222757,0.136962
rougeLsum,0.296432,0.310109,0.27146,0.165537
