In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd drive/My\ Drive/Colab\ Notebooks/apex-codes/citation_sum

/content/drive/My Drive/Colab Notebooks/apex-codes/citation_sum


## The ROUGE evaluation is done between a generated summary for a cited article and its abstract (both can be accessed using their cited ids from the respective folders)

In [None]:
!pip3 install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [None]:
import pandas as pd
import numpy as np
import os
from rouge import Rouge
import json

## Evaluation against the groun truth summary

In [None]:
def _compute_ROUGE(generated_summary, human_summary):  
  rouge = Rouge()
  
  scores = rouge.get_scores(generated_summary, human_summary)[0]
  
  rouge_1_f = scores['rouge-1']['f']
  rouge_2_f = scores['rouge-2']['f']
  rouge_l_f = scores['rouge-l']['f']

  rouge_1_f = rouge_1_f * 100
  rouge_2_f = rouge_2_f * 100
  rouge_l_f = rouge_l_f * 100

  return rouge_1_f, rouge_2_f, rouge_l_f

## Iterate through the directories housing the generated and human summaries and store into a container and do evaluation

### Call to the ROUGE evaluation method in main method

In [None]:
def main():
  model_name = "TransFuse"  # changes with the model used
  w_abstract = True

  if not w_abstract:
    summary_type = "SUMMARIES_FROM_CITATIONS_ONLY"   # changes with the summary type
  else:
    summary_type = "SUMMARIES_FROM_CITATIONS_AND_RP_ABSTRACT"

  GENERATED_SUMMARY_PATH = f"{model_name}_Results_SciSummNet/{summary_type}"
  HUMAN_SUMMARY_PATH = "ScisummNet/scisummnet_release1.1__20190413/top1000_complete"


  # First read all the generated summaries into a dict where the key is the paper_id---same for human summaries---the keys will be used to match the two
  dict_generated_summaries = {}
  dict_human_summaries = {}

  # Read all the generated summaries
  for paper_id in os.listdir(GENERATED_SUMMARY_PATH):
    paper_id_wo_txt = str(paper_id.replace('.txt', ''))
    with open(os.path.join(GENERATED_SUMMARY_PATH, paper_id), 'r') as fp:
      summary = fp.read()
    fp.close()
    dict_generated_summaries[paper_id_wo_txt] = summary

  # Read the human summaries
  for paper_id in os.listdir(HUMAN_SUMMARY_PATH):
    human_summary_path = f"{HUMAN_SUMMARY_PATH}/{paper_id}/summary"
    with open(f"{human_summary_path}/{paper_id}.gold.txt", 'r') as fp:
      ground_truth_summary = fp.read()
    fp.close()
    dict_human_summaries[paper_id] = ground_truth_summary

  total_no_summaries = len(dict_generated_summaries)
  rouge_1_f_sum, rouge_2_f_sum, rouge_l_f_sum = 0.0, 0.0, 0.0
  for paper_id, gen_summary in dict_generated_summaries.items():
    human_summary = dict_human_summaries[paper_id]

    # call to the ROUGE evaluation method
    try:
      rouge_1_f, rouge_2_f, rouge_l_f = _compute_ROUGE(gen_summary, human_summary)
    except:
      continue

    rouge_1_f_sum += rouge_1_f
    rouge_2_f_sum += rouge_2_f
    rouge_l_f_sum += rouge_l_f

  avg_rouge_1_f = rouge_1_f_sum/float(total_no_summaries)
  avg_rouge_2_f = rouge_2_f_sum/float(total_no_summaries)
  avg_rouge_l_f = rouge_l_f_sum/float(total_no_summaries)

  print(f"Final ROUGE-1 wrt human summaries: %.2f" % avg_rouge_1_f)
  print(f"Final ROUGE-2 wrt human summaries: %.2f" % avg_rouge_2_f)
  print(f"Final ROUGE-L wrt human summaries: %.2f" % avg_rouge_l_f)
  print('\n')


In [None]:
if __name__ == "__main__":
  main()

## Evaluation against the citation contexts

In [None]:
def main():
  model_name = "TransFuse"  # changes with the model used
  w_abstract = True
  if not w_abstract:
    summary_type = "SUMMARIES_FROM_CITATIONS_ONLY"   # changes with the summary type
  else:
    summary_type = "SUMMARIES_FROM_CITATIONS_AND_RP_ABSTRACT"

  # Read the citation contexts for each reference paper
  DATA_PATH = "ScisummNet/scisummnet_release1.1__20190413/top1000_complete"
  RESULTS_DIR = f"{model_name}_Results_SciSummNet"   # changes based on the model currently being used
  dict_citation_contexts = {}


  for count, paper_id in enumerate(os.listdir(DATA_PATH)):
    citing_sentences = list()   # to store all incoming citing sentences
    for file in os.listdir(os.path.join(DATA_PATH, paper_id)):
      if file.endswith('.json'):
        with open(os.path.join(f"{DATA_PATH}/{paper_id}", file), 'r') as fp:
          data = json.load(fp)
        fp.close()
        citing_sentences = [obj['clean_text'] for obj in data]

    complete_citing_sentences_str = " ".join(citing_sentences)
    dict_citation_contexts[paper_id] = complete_citing_sentences_str


  ###############################################################################

  GENERATED_SUMMARY_PATH = f"{model_name}_Results_SciSummNet/{summary_type}"
  # First read all the generated summaries into a dict where the key is the paper_id---same for human summaries
  # ---the keys will be used to match the two
  dict_generated_summaries = {}

  # Read all the generated summaries
  for paper_id in os.listdir(GENERATED_SUMMARY_PATH):
    paper_id_wo_txt = str(paper_id.replace('.txt', ''))
    with open(os.path.join(GENERATED_SUMMARY_PATH, paper_id), 'r') as fp:
      summary = fp.read()
    fp.close()
    dict_generated_summaries[paper_id_wo_txt] = summary

  #########################################################################

  total_no_summaries = len(dict_generated_summaries)

  rouge_1_f_sum, rouge_2_f_sum, rouge_l_f_sum = 0.0, 0.0, 0.0
  for paper_id, gen_summary in dict_generated_summaries.items():
    citation_contexts = dict_citation_contexts[paper_id]   # since this is for evaluation against the citation contexts

    # call to the ROUGE evaluation method
    try:
      rouge_1_f, rouge_2_f, rouge_l_f = _compute_ROUGE(gen_summary, citation_contexts)
    except:
      continue

    rouge_1_f_sum += rouge_1_f
    rouge_2_f_sum += rouge_2_f
    rouge_l_f_sum += rouge_l_f

  avg_rouge_1_f = rouge_1_f_sum/float(total_no_summaries)
  avg_rouge_2_f = rouge_2_f_sum/float(total_no_summaries)
  avg_rouge_l_f = rouge_l_f_sum/float(total_no_summaries)

  print(f"Final ROUGE-1 wrt citation contexts: %.2f" % avg_rouge_1_f)
  print(f"Final ROUGE-2 wrt citation contexts: %.2f" % avg_rouge_2_f)
  print(f"Final ROUGE-L wrt citation contexts: %.2f" % avg_rouge_l_f)
  print('\n')

In [None]:
if __name__ == "__main__":
  main()