# Lib import

In [1]:
import sys
import json
import pandas as pd
from datasets import load_metric

# Load dataset

In [2]:
def load_data(dts):
    path = f'MuP_dataset/{dts}_complete.jsonl'
    try:
        with open(path, 'r') as json_file:
            json_list = list(json_file)
        col_name = ["paper_id","summary"]
    except:
        print(f"Warning: Did not load dataset from {path}")
        return
    summary_df = pd.DataFrame(columns=col_name)
    for json_str in json_list[:]:
        result = json.loads(json_str)
        df = pd.DataFrame([[result["paper_id"], result["summary"]]], columns=col_name)
        summary_df = pd.concat([summary_df,df])
    return summary_df

def split_sum_num(df):
    num_paper = df.groupby(['paper_id']).count()
    num_paper['num_paper'] = 1
    num_paper.groupby(['summary']).count()
    num_paper = df.groupby(['paper_id']).count()
    num_paper['num_paper'] = 1
    num_paper.groupby(['summary']).count()
    num_paper.drop('num_paper', inplace=True,axis=1)
    num_paper.sort_values(["summary"])

    df_list = []
    for i in range(0, max(num_paper['summary'])):
        paper_id = (list((num_paper[num_paper["summary"]==(i+1)]).index))
        df_i = df[df.paper_id.isin(paper_id)].sort_values("paper_id")
        df_list.append(df_i.groupby('paper_id').apply(lambda df_: df_[['summary']].values.flatten()).apply(pd.Series).reset_index())
        
    return df_list

In [3]:
summary_df = load_data("training")

In [4]:
df_list = split_sum_num(summary_df)

In [5]:
df_list[3]

Unnamed: 0,paper_id,0,1,2,3
0,SP:00130f3b3a6b3b71f9b487003a18b43517cacbbb,This work proposes a new method for subgame so...,This paper proposes a novel technique for sear...,The authors develop a new approximation to for...,This paper deals with two related problems. Th...
1,SP:00215e91570b72ae8202535812037e710e766253,The paper studies continual learning and that ...,The paper learns the binary basis mask for a f...,The paper describes an approach to continual l...,"In the current paper, the authors propose a no..."
2,SP:006a99a453b861691e5ea2c02012a2aef44d393e,The paper considers the setting in which a sin...,"This paper proposes a single-actor, multi-crit...",This paper proposes to extend the actor-critic...,The paper introduces a variant of actor-critic...
3,SP:006e9fb3f4bd9fce1b751e6491f93ca9a918b1d0,Summary. Prior works have used auxiliary tasks...,This work proposes random General Value Functi...,Summary ------- Owing to the importance of st...,This paper introduces a new auxiliary task for...
4,SP:008b937acb21afd5449982967b6daac37b4134ab,This paper studies a relatively little-concern...,This paper studies positive and unlabeled (PU)...,This paper addresses the problem of class-prio...,This paper studies the prior $\pi$ in PU learn...
...,...,...,...,...,...
1108,SP:ff321c62ff012f2a3c4fb02f9ba95daee33636f0,"In this work, the authors propose a new featur...",The paper proposes an infinite-width parameter...,The authors study a certain variant of an MLP ...,The paper introduces an approach (named pi-lim...
1109,SP:ff608359d72b2fd9207c2c8d86282ace1d8b619b,This paper proposes a defense method against p...,This paper studies the problem of certifying a...,This paper studied how to certificate a policy...,This paper proposes a certification method aga...
1110,SP:ff641ae83dfd806ab9770e37bd824e928c2b06a6,This paper has the following contributions: * ...,This paper introduces a neural network archite...,This paper proposes to use self-attention betw...,Deep parametric models have demonstrated treme...
1111,SP:ffb273a8ad8895be2fcfa2af3cb2624617304de9,"In this paper, the authors propose a novel S...",A method called LaGraph is proposed for semi-s...,The paper proposes a new self-supervised learn...,The authors propose a self-supervised learning...


# Score calculation

In [6]:
from evaluate import load

In [193]:
def n_scores(df, subscore_col):
    n = len(df.columns)-1
    pairs = [f'{i}-{j}' for i in range(n) for j in range(i+1, n)]
    col = pd.MultiIndex.from_product([subscore_col, pairs])
    scores = pd.DataFrame(columns=col)
    scores.insert(0, "paper_id", df["paper_id"])
    # scores.set_index("paper_id", inplace=True)
    return scores

## ROUGE Score

In [195]:
rougescore = load("rouge")
rougescore = load_metric("rouge")

def rouge_cal(df):
    rouge_list = ['rouge1', 'rouge2', 'rougeL', 'rougeLsum']
    score_list = ['precision', 'recall', 'fmeasure']

    n = len(df.columns)-1
    print(f"Calculating ROUGE on {n} summaries")

    df_score_dict = {}
    mux = pd.MultiIndex.from_product([["summary"],(list(df.columns))[1:]])
    df_score = pd.DataFrame(columns=mux)
    df_score.insert(0, "paper_id", df["paper_id"])
    for col in df:
        if col != 'paper_id':
            df_score[('summary', col)] = df[col]
    df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')
    for r in rouge_list:
        df_score_dict[r] = df_score

    for idx, row in df.iterrows():
        for i in range(n):
            for j in range(i+1,n):
                pair = f'{i}-{j}'
                score = rougescore.compute(predictions=[row[i]], references=[row[j]], use_stemmer=False)
                for r in rouge_list:
                    df_score_dict[r].loc[idx, ('precision', pair)] = ((score[r]).low).precision
                    df_score_dict[r].loc[idx, ('recall', pair)] = ((score[r]).low).recall
                    df_score_dict[r].loc[idx, ('fmeasure', pair)] = ((score[r]).low).fmeasure

    return df_score_dict

In [197]:
for df in df_list[1:]:
    dict_result = rouge_cal(df[:3])
    for key, val in dict_result.items():
        # print(key, "\n", val)
        break
    break
dict_result['rouge1']

  df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')


Unnamed: 0_level_0,paper_id,summary,summary,precision,recall,fmeasure
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,0-1,0-1,0-1
0,SP:0007ee9ce7dfaf12a7dff4aa2979403aed9397d7,This paper proposed a novel benchmark for rela...,The paper focuses on the relation extraction t...,0.170213,0.197531,0.182857
1,SP:001a31f7a439ab22943dedb4fa4d46e3dd56e603,This paper is an interesting exploratory study...,This paper explores learning chess from raw no...,0.292683,0.216216,0.248705
2,SP:001e57e71bafdb52d6511bdd6aa73b78d60248f2,The manuscript considers the problem of imitat...,"The paper proposes an imitation method, I2L, t...",0.15122,0.295238,0.2


## BERT Score

In [12]:
bertscore = load("bertscore")
def bertscore_cal():
    n = len(df_list[3].columns)-1

In [13]:
rouge_score = load_metric("rouge")
sumaries = ["Hello world", "Hello friend"]
score = rouge_score.compute(
        predictions=[sumaries[0]], references=[sumaries[1]], use_stemmer=False
    )

  rouge_score = load_metric("rouge")


In [44]:
score

{'rouge1': AggregateScore(low=Score(precision=0.5, recall=0.5, fmeasure=0.5), mid=Score(precision=0.5, recall=0.5, fmeasure=0.5), high=Score(precision=0.5, recall=0.5, fmeasure=0.5)),
 'rouge2': AggregateScore(low=Score(precision=0.0, recall=0.0, fmeasure=0.0), mid=Score(precision=0.0, recall=0.0, fmeasure=0.0), high=Score(precision=0.0, recall=0.0, fmeasure=0.0)),
 'rougeL': AggregateScore(low=Score(precision=0.5, recall=0.5, fmeasure=0.5), mid=Score(precision=0.5, recall=0.5, fmeasure=0.5), high=Score(precision=0.5, recall=0.5, fmeasure=0.5)),
 'rougeLsum': AggregateScore(low=Score(precision=0.5, recall=0.5, fmeasure=0.5), mid=Score(precision=0.5, recall=0.5, fmeasure=0.5), high=Score(precision=0.5, recall=0.5, fmeasure=0.5))}

In [159]:
((score['rouge1']).low).precision

0.5