# Lib import

In [229]:
import sys
import json
import pandas as pd
from datasets import load_metric

# Load dataset

In [230]:
def load_data(dts):
    path = f'MuP_dataset/{dts}_complete.jsonl'
    try:
        with open(path, 'r') as json_file:
            json_list = list(json_file)
        col_name = ["paper_id","summary"]
    except:
        print(f"Warning: Did not load dataset from {path}")
        return
    summary_df = pd.DataFrame(columns=col_name)
    for json_str in json_list[:]:
        result = json.loads(json_str)
        df = pd.DataFrame([[result["paper_id"], result["summary"]]], columns=col_name)
        summary_df = pd.concat([summary_df,df])
    return summary_df

def split_sum_num(df):
    num_paper = df.groupby(['paper_id']).count()
    num_paper['num_paper'] = 1
    num_paper.groupby(['summary']).count()
    num_paper = df.groupby(['paper_id']).count()
    num_paper['num_paper'] = 1
    num_paper.groupby(['summary']).count()
    num_paper.drop('num_paper', inplace=True,axis=1)
    num_paper.sort_values(["summary"])

    df_list = []
    for i in range(0, max(num_paper['summary'])):
        paper_id = (list((num_paper[num_paper["summary"]==(i+1)]).index))
        df_i = df[df.paper_id.isin(paper_id)].sort_values("paper_id")
        df_list.append(df_i.groupby('paper_id').apply(lambda df_: df_[['summary']].values.flatten()).apply(pd.Series).reset_index())
        
    return df_list

In [231]:
dts = "training"
summary_df = load_data(dts)

In [232]:
df_list = split_sum_num(summary_df)

In [233]:
df_list[3]

Unnamed: 0,paper_id,0,1,2,3
0,SP:00130f3b3a6b3b71f9b487003a18b43517cacbbb,This work proposes a new method for subgame so...,This paper proposes a novel technique for sear...,The authors develop a new approximation to for...,This paper deals with two related problems. Th...
1,SP:00215e91570b72ae8202535812037e710e766253,The paper studies continual learning and that ...,The paper learns the binary basis mask for a f...,The paper describes an approach to continual l...,"In the current paper, the authors propose a no..."
2,SP:006a99a453b861691e5ea2c02012a2aef44d393e,The paper considers the setting in which a sin...,"This paper proposes a single-actor, multi-crit...",This paper proposes to extend the actor-critic...,The paper introduces a variant of actor-critic...
3,SP:006e9fb3f4bd9fce1b751e6491f93ca9a918b1d0,Summary. Prior works have used auxiliary tasks...,This work proposes random General Value Functi...,Summary ------- Owing to the importance of st...,This paper introduces a new auxiliary task for...
4,SP:008b937acb21afd5449982967b6daac37b4134ab,This paper studies a relatively little-concern...,This paper studies positive and unlabeled (PU)...,This paper addresses the problem of class-prio...,This paper studies the prior $\pi$ in PU learn...
...,...,...,...,...,...
1108,SP:ff321c62ff012f2a3c4fb02f9ba95daee33636f0,"In this work, the authors propose a new featur...",The paper proposes an infinite-width parameter...,The authors study a certain variant of an MLP ...,The paper introduces an approach (named pi-lim...
1109,SP:ff608359d72b2fd9207c2c8d86282ace1d8b619b,This paper proposes a defense method against p...,This paper studies the problem of certifying a...,This paper studied how to certificate a policy...,This paper proposes a certification method aga...
1110,SP:ff641ae83dfd806ab9770e37bd824e928c2b06a6,This paper has the following contributions: * ...,This paper introduces a neural network archite...,This paper proposes to use self-attention betw...,Deep parametric models have demonstrated treme...
1111,SP:ffb273a8ad8895be2fcfa2af3cb2624617304de9,"In this paper, the authors propose a novel S...",A method called LaGraph is proposed for semi-s...,The paper proposes a new self-supervised learn...,The authors propose a self-supervised learning...


# Score calculation

In [234]:
from evaluate import load

In [235]:
def n_scores(df, subscore_col):
    n = len(df.columns)-1
    pairs = [f'{i}-{j}' for i in range(n) for j in range(i+1, n)]
    col = pd.MultiIndex.from_product([subscore_col, pairs])
    scores = pd.DataFrame(columns=col)
    scores.insert(0, "paper_id", df["paper_id"])
    # scores.set_index("paper_id", inplace=True)
    return scores

## ROUGE Score

In [242]:
rougescore = load_metric("rouge")

def rouge_cal(df):
    n = len(df.columns)-1
    print(f"Calculating ROUGE on {n} summaries")

    rouge_list = ['rouge1', 'rouge2', 'rougeL', 'rougeLsum']
    score_list = ['precision', 'recall', 'fmeasure']

    df_score_dict = {}
    mux = pd.MultiIndex.from_product([["summary"],(list(df.columns))[1:]])
    df_score = pd.DataFrame(columns=mux)
    df_score.insert(0, "paper_id", df["paper_id"])
    for col in df:
        if col != 'paper_id':
            df_score[('summary', col)] = df[col]
    df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')
    for r in rouge_list:
        df_score_dict[r] = df_score.copy(deep=True)

    df_len = len(df)
    for idx, row in df.iterrows():
        sys.stdout.write(f"\r{idx+1}/{df_len}")
        sys.stdout.flush()
        for i in range(n):
            for j in range(i+1,n):
                pair = f'{i}-{j}'
                score = rougescore.compute(predictions=[row[i]], references=[row[j]], use_stemmer=False)
                for r in rouge_list:
                    df_score_dict[r].loc[idx, ('precision', pair)] = ((score[r]).low).precision
                    df_score_dict[r].loc[idx, ('recall', pair)] = ((score[r]).low).recall
                    df_score_dict[r].loc[idx, ('fmeasure', pair)] = ((score[r]).low).fmeasure
                break
            break
        break

    return df_score_dict

In [244]:
text = ['The authors describe the conditioned GAN model to generate speaker conditioned Mel spectra. They augment the z-space corresponding to the identification with latent variables that allow a richer set of produced audio. In a way this is like a partially conditioned model that has "extra" degrees of freedom. It looks that the "latent" variables are just concaneted to the "original" set of z-values (altough with particular conditions to maximize independence). The conditioning of the z-space has originality in it and may provide interesting to the audience. Ultimately one coud think about z-space direction being totally mapped to specific features of the produced signal.',
 'Quality: This submission claims to present a model that can control non-annotated attributes such as speaking style, accent, background noise, etc. Though empirical evidence in the form of numerical measurements is presented for some controllable attributes more evidence other than individual samples and authors claims is needed. For example a reliable numerical evidence is needed on page 4 following "We also found...", page 5 following "We discovered....", page 5 following "It clearly presents...", page 5 following "Drawing samples..." evidence is given only for 1 dimension, page 6 following "Figure 7(b)...". ']
score = rougescore.compute(predictions=[text[0]], references=[text[1]], use_stemmer=False)
score['rouge1'].low.precision

0.12149532710280374

In [243]:
for n, df in enumerate(df_list):
    if n+1 > 1:
        dict_result = rouge_cal(df)
        for key, val in dict_result.items():
            print(key)
            # val.to_csv(f"visualization_data/rouge-between-sum/{key}/{dts}_{key}_{n+1}sum.csv")
dict_result['rouge1']

Calculating ROUGE on 2 summaries
1/2845rouge1
rouge2
rougeL
rougeLsum
Calculating ROUGE on 3 summaries
1/2116rouge1
rouge2
rougeL
rougeLsum
Calculating ROUGE on 4 summaries
1/1113

  df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')
  df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')
  df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')


rouge1
rouge2
rougeL
rougeLsum
Calculating ROUGE on 5 summaries
1/10rouge1
rouge2
rougeL
rougeLsum
Calculating ROUGE on 6 summaries
1/15rouge1
rouge2
rougeL
rougeLsum
Calculating ROUGE on 7 summaries
1/4

  df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')
  df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')
  df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')


rouge1
rouge2
rougeL
rougeLsum


Unnamed: 0_level_0,paper_id,summary,summary,summary,summary,summary,summary,summary,precision,precision,...,fmeasure,fmeasure,fmeasure,fmeasure,fmeasure,fmeasure,fmeasure,fmeasure,fmeasure,fmeasure
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,2,3,4,5,6,0-1,0-2,...,2-3,2-4,2-5,2-6,3-4,3-5,3-6,4-5,4-6,5-6
0,SP:2cf4a3964537ff5dd1f7b600ab567b4d0b3cc03e,This work reports the problem of image classif...,The paper evaluates pathologies of modern neur...,The paper identifies sparse sets of pixels whi...,The paper presents an interesting finding that...,"Authors define ""overinterpretation"" as an unde...","This paper proposes ""overinterpretation"" which...",The work utilizes the SIS (a local feature-imp...,0.23301,,...,,,,,,,,,,
1,SP:b622788bec805621c2abf11ffa25c0d55e50f4d3,The paper performs a detailed hyperparameter s...,This paper was previously rejected by NeurIPS ...,"In data-parallel distributed training, increas...",This paper revisits the effectiveness of the o...,I carefully read the responses from the author...,"In this work, the authors compared the standar...",The authors detail the significant effort requ...,,,...,,,,,,,,,,
2,SP:ddc796b9185d372f4d0829f436bbca50c3990867,This paper introduces a Jax package for implic...,The paper proposes a modular and efficient fra...,This paper presents a module for implicit diff...,The paper promises extension of Google’s JAX l...,This paper provides a unified tool for combini...,"A good paper considers a critical problem, but...",The authors propose a unified modular framewor...,,,...,,,,,,,,,,
3,SP:f202f3d6780876a0bdd7d7bd4d7047719a145177,The authors propose a novel skill discovery me...,This method learns a space of intrinsic goals ...,The paper presents a framework for an unsuperv...,The paper proposes an unsupervised exploration...,To increase the state space coverage with unsu...,The paper proposes a novel algorithm for learn...,The idea of this work is to maximize coverage ...,,,...,,,,,,,,,,


## BERT Score

In [228]:
bertscore = load("bertscore")
def bertscore_cal(df):
    n = len(df.columns)-1
    print(f"Calculating BERTScore on {n} summaries")
    
    score_list = ['precision', 'recall', 'f1']

    mux = pd.MultiIndex.from_product([["summary"],(list(df.columns))[1:]])
    df_score = pd.DataFrame(columns=mux)
    df_score.insert(0, "paper_id", df["paper_id"])
    for col in df:
        if col != 'paper_id':
            df_score[('summary', col)] = df[col]
    df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')

    for i in range(n):
        for j in range(i+1,n-1):
            pair = f'{i}-{j}'
            summary1 = list(df_score.loc[:, ('summary', i)])
            summary2 = list(df_score.loc[:, ('summary', j)])
            print(pair)
            result = bertscore.compute(predictions=summary1, references=summary2, lang="en", rescale_with_baseline=True)
            for score in score_list:
                df_score.loc[:, (score, pair)] = result[score]

    return df_score
    
bertscore_cal(df_list[2][:3])

Calculating BERTScore on 3 summaries
0-1


  df_score = df_score.merge(n_scores(df, score_list), left_on='paper_id', right_on='paper_id')
  df_score.loc[:, (score, pair)] = result[score]


Unnamed: 0_level_0,paper_id,summary,summary,summary,precision,precision,precision,recall,recall,recall,f1,f1,f1
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,2,0-1,0-2,1-2,0-1,0-2,1-2,0-1,0-2,1-2
0,SP:001ae7379191bb40fc356a37deb2f0ffc4426e52,This paper proposes EgoTR for cross-view geo-l...,The paper tackles the cross-view geo-localizat...,This paper proposes to use transformer for cro...,0.224304,,,0.056118,,,0.140213,,
1,SP:0045d9a733e9d3db6e42c4fcadaa7dc57f7b5004,This paper considers the problem of generaliza...,This paper aims to design a counterfactual rei...,The paper presents a method to improve the gen...,-0.104706,,,0.213069,,,0.050674,,
2,SP:0055dca69c153ed21b420741c479a2ef00be2ef6,"This paper compares the so-called ""State Evolu...",This paper considers the state evolution equat...,This paper provides derivations of equivalence...,0.054633,,,0.191037,,,0.123345,,


In [None]:
for n, df in enumerate(df_list):
    if n+1 > 1:
        result = bertscore_cal(df)
        val.to_csv(f"visualization_data/bertscore-between-sum/{dts}_bertscore_{n+1}sum.csv")