# ROUGE Score V01

## imports

In [1]:
import sys
sys.path.append('..')

In [5]:
import os
import re
import nltk
import numpy as np
import pandas as pd
import spacy
import rouge

from glob import glob
from tqdm import tqdm

from textrank import TextRank

from utils import pdf_to_text
from types_ import *

# ignore warning
import warnings

warnings.filterwarnings(action="ignore")

## Data Load

In [None]:
dir_path = "../data/en/raw"
file_list = glob(f"{dir_path}/*.pdf")

fnames = []
for fname in file_list:
    _, fname = os.path.split(fname)
    fname, _ = os.path.splitext(fname)
    fnames.append(fname)

In [4]:
ref_path = '../data/en/global'
ref_list = glob(f'{ref_path}/*.txt')

ref_names = []
for ref in ref_list:
    _, ref = os.path.split(ref)
    ref, _ = os.path.splitext(ref)
    ref_names.append(ref)

In [5]:
eval_fnames = []
for ref_name in ref_names:
    if ref_name in fnames:
        eval_fnames.append(ref_name)

## Summarize

In [6]:
from tqdm import tqdm

In [7]:
stopwords = nltk.corpus.stopwords.words("english")

textrank = TextRank(language="en", tokenizer=None, stopwords=stopwords)

In [8]:
hyp_path = '../data/en/hyp'
ref_path = '../data/en/ref'

if not os.path.exists(hyp_path):
    os.mkdir(hyp_path)
if not os.path.exists(ref_path):
    os.mkdir(ref_path)

In [14]:
file_path = '../data/en/raw'

file_id = 1
for fname in tqdm(eval_fnames):
    abs_fname = f'{file_path}/{fname}.pdf'
    
    sents = pdf_to_text(abs_fname)
    keysents = textrank.summarize(sents, topk=3)
    
    with open(f"{hyp_path}/{fname}.txt", "w", encoding="utf8") as f:
            f.write("\n".join(keysents))

100%|██████████████████████████████████████████████████████████████████████████████████| 57/57 [01:54<00:00,  2.01s/it]


## ROUGE Score

In [3]:
class RougeScorer:
    def __init__(self, use_tokenizer=True):

        self.use_tokenizer = use_tokenizer
        if use_tokenizer:
            self.tokenizer = Mecab()

        self.rouge_evaluator = rouge.Rouge(
            metrics=["rouge-n", "rouge-l"],
            max_n=2,
            limit_length=True,
            length_limit=1000,
            length_limit_type="words",
            apply_avg=True,
            apply_best=False,
            alpha=0.5,  # Default F1_score
            weight_factor=1.2,
            stemming=True,
        )

    def compute_rouge(self, ref_path, hyp_path):
        ref_fnames = glob(f"{ref_path}/*.txt")
        hyp_fnames = glob(f"{hyp_path}/*.txt")
        ref_fnames.sort()
        hyp_fnames.sort()

        self.reference_summaries = []
        self.generated_summaries = []

        for ref_fname, hyp_fname in tqdm(
            zip(ref_fnames, hyp_fnames), total=len(ref_fnames)
        ):
            assert os.path.split(ref_fname)[1] == os.path.split(hyp_fname)[1]

            with open(ref_fname, "r", encoding="utf8") as f:
                ref = f.read().split("\n")
                ref = "".join(ref)

            with open(hyp_fname, "r", encoding="utf8") as f:
                hyp = f.read().split("\n")
                hyp = "".join(hyp)

            if self.use_tokenizer:
                ref = self.tokenizer.morphs(ref)
                hyp = self.tokenizer.morphs(hyp)

            ref = " ".join(ref)
            hyp = " ".join(hyp)

            self.reference_summaries.append(ref)
            self.generated_summaries.append(hyp)

        scores = self.rouge_evaluator.get_scores(
            self.generated_summaries, self.reference_summaries
        )
        str_scores = self.format_rouge_scores(scores)
        self.save_rouge_scores(str_scores)
        return str_scores

    def save_rouge_scores(self, str_scores):
        with open("rouge_scores.txt", "w") as output:
            output.write(str_scores)

    def format_rouge_scores(self, scores):
        return """\n
    ****** ROUGE SCORES ******
    ** ROUGE 1
    F1        >> {:.3f}
    Precision >> {:.3f}
    Recall    >> {:.3f}
    ** ROUGE 2
    F1        >> {:.3f}
    Precision >> {:.3f}
    Recall    >> {:.3f}
    ** ROUGE L
    F1        >> {:.3f}
    Precision >> {:.3f}
    Recall    >> {:.3f}""".format(
            scores["rouge-1"]["f"],
            scores["rouge-1"]["p"],
            scores["rouge-1"]["r"],
            scores["rouge-2"]["f"],
            scores["rouge-2"]["p"],
            scores["rouge-2"]["r"],
            scores["rouge-l"]["f"],
            scores["rouge-l"]["p"],
            scores["rouge-l"]["r"],
        )

In [4]:
rouge_eval = RougeScorer(use_tokenizer=False)

In [10]:
ref_path = "../data/en/global/"
hyp_path = "../data/en/hyp"

result = rouge_eval.compute_rouge(ref_path, hyp_path)

100%|██████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 84.44it/s]


In [12]:
print(result)



    ****** ROUGE SCORES ******
    ** ROUGE 1
    F1        >> 0.742
    Precision >> 0.684
    Recall    >> 0.913
    ** ROUGE 2
    F1        >> 0.605
    Precision >> 0.558
    Recall    >> 0.746
    ** ROUGE L
    F1        >> 0.524
    Precision >> 0.485
    Recall    >> 0.622
