In [None]:
# Import used modules

from pyrouge import Rouge155
from tqdm import tqdm_notebook as tqdm

import os
import pandas as pd
import json
import logging

In [None]:
# Configure filepath

os.chdir("../model_result/")
logging.disable(logging.CRITICAL)

# Create Helper Function

In [None]:
def create_sent(document):
    """Convert list of word into a sentences

    Parameters
    ----------
    lst_word: two dimensional list
        list of word

    Returns
    -------
    string
    """
    
    res = ""
    for i in range(len(document)):
        tmp =  " ".join(document[i]) + "\n"
        tmp = tmp.lower()
        res += tmp
    return res

In [None]:
def write_to_file(path, document):
    """Write 'document' into a file that being specified by path

    Parameters
    ----------
    path: string
        file path
    lst_word: two dimensional list
        list of word

    Returns
    -------
    boolean
        True if success, vice versa
    """
    
    try:
        sent = create_sent(document)
        f = open(path, "w")
        f.write(sent)
        f.close()
        return True
    except:
        return False

In [None]:
def create_folder(dest, name):
    """Create several folder that are needed for evaluation process of a model

    Parameters
    ----------
    dest: string
        name of folder that will contain the created folder
    name: string
        model name

    Returns
    -------
    string
        reference folder path and model result folder path
    """
    
    if not os.path.exists(dest):
        os.mkdir(dest)
    dest_format = dest + "/{}"
    dest_folder = dest_format.format(name)
    ref_folder = dest_folder + "/reference"
    model_folder = dest_folder+"/model"
    os.mkdir(dest_folder)
    os.mkdir(ref_folder)
    os.mkdir(model_folder)
    return ref_folder, model_folder

In [None]:
def create_file(json_obj, ref_folder, model_folder):
    """Write data in 'json_obj' into file on a specific folder

    Parameters
    ----------
    json_obj: dict
        dictionary that contains model summarization result and its reference
    ref_model: string
        reference folder path
    model_folder: string
        model result folder path

    Returns
    -------
    None
    """
    
    counter = 0
    for key in json_obj.keys():
        ref_name = str(counter)+"_reference.txt"
        model_name = str(counter)+"_model.txt"
        res_ref = write_to_file(ref_folder+"/"+ref_name, json_obj[key]['reference'])
        res_model = write_to_file(model_folder+"/"+model_name, json_obj[key]['hypotesis'])   
        counter += 1

In [None]:
def save_dct(dct, path):
    """Write data in 'dct' into json specified by 'path'

    Parameters
    ----------
    dct: dict
        dictionary that want to be written
    path: string
        dictionary file path

    Returns
    -------
    None
    """
    
    with open(path, "w") as outfile:
        json.dump(output_dict, outfile)

# Run Evaluation

## One File

In [None]:
# Calculate ROUGE on a file 

filename = "" # Fill with filepath of a file that want to be checked
json_obj = json.load(open(filename))

name = filename.split(".")[0]

dest = "evaluation"
ref_folder, model_folder = create_folder(dest, name)
create_file(json_obj, ref_folder, model_folder)

base_path = os.getcwd()
rouge_pl = ""        # Fill with absolute filepath of a ROUGE-1.5.5.pl
rouge_path = "{}/data".format(rouge_pl)
conf = "-e {} -c 95 -2 -1 -U -r 1000 -n 4 -a".format(rouge_path)       # Fill with ROUGE configuration

r = Rouge155(rouge_args=conf)
r.system_dir = base_path + "/" + dest + "/{}/reference".format(name)
r.model_dir = base_path + "/" + dest + "/{}/model".format(name)
r.system_filename_pattern = '(\d+)_reference.txt'
r.model_filename_pattern = '#ID#_model.txt'
output = r.convert_and_evaluate()

print(output)

In [None]:
# Save evaluation result

output_dict = r.output_to_dict(output)
save_dct(output_dict, "./{}/{}/result.json".format(dest, name))

## Multiple File

In [None]:
# Fill all list with used dataset/method on all file that want to be checked

lst_dataset = ["IndoSUM", "Liputan6"]
lst_topic_modelling = ["LDA", "LSA", "NMF"]
lst_embedding = ["Word2Vec", "FastText", "TF-IDF", "BoW", "BERT"]
lst_similarity = ["Cosine", "Euclidean", "Jaccard"]
lst_method = ["Individual", "Combined"]

In [None]:
# Calculate ROUGE on multiple file

counter = len(lst_dataset) * len(lst_topic_modelling) * len(lst_embedding) * len(lst_similarity) * len(lst_method)
pbar = tqdm(total=counter)

dest = "evaluation"
base_path = os.getcwd() 
rouge_pl = "/workspace/pyrouge/pyrouge/rouge/tools/ROUGE-1.5.5"        # Fill with absolute filepath of a ROUGE-1.5.5.pl
rouge_path = "{}/data".format(rouge_pl)
conf = "-e {} -c 95 -2 -1 -U -r 1000 -n 4 -a".format(rouge_path)       # Fill with ROUGE configuration

lst_res = []

for dataset in lst_dataset:
    for topic in lst_topic_modelling:
        for embedding in lst_embedding:
            for similarity in lst_similarity:
                for method in lst_method:
                    filename = "{}-{}-{}-{}-{}.json".format(dataset, topic, embedding, similarity, method)
                    json_obj = json.load(open(filename))
                    
                    name = filename.split(".")[0]
                    dest = "evaluation"
                    ref_folder, model_folder = create_folder(dest, name)
                    create_file(json_obj, ref_folder, model_folder)
                    
                    r = Rouge155(rouge_args=conf)
                    r.system_dir = base_path + "/" + dest + "/{}/reference".format(name)
                    r.model_dir = base_path + "/" + dest + "/{}/model".format(name)
                    r.system_filename_pattern = '(\d+)_reference.txt'
                    r.model_filename_pattern = '#ID#_model.txt'
                    
                    output = r.convert_and_evaluate()
                    output_dict = r.output_to_dict(output)
                    lst_res.append([
                        name,
                        output_dict['rouge_1_f_score'],
                        output_dict['rouge_2_f_score'],
                        output_dict['rouge_3_f_score']
                    ])
                    save_dct(output_dict, "./{}/{}/result.json".format(dest, name))
                    pbar.update(1)

res_df = pd.DataFrame(lst_res, columns=["Filename", "ROUGE-1", "ROUGE-2", "ROUGE-3"])
res_df   