In [1]:
import pandas as pd
from evaluate import load
bertscore = load("bertscore")

In [2]:
predictions = ["cat", 'cats', "man", "love", "dog", "king", "pluto", "sun", "I ate dinner"]
# predictions = ["I love you", "I hate you", "I really like you", "I will go with you", "I ate dinner", "Physics is very fun", "My name is John", "There are many dogs"]
references = [predictions[0]]*len(predictions)
results = bertscore.compute(predictions=predictions, references=references, lang="en", rescale_with_baseline=False)
df_results = pd.DataFrame(results, index=predictions)
df_results.drop(columns=["hashcode"], inplace=True)
df_results.sort_values("f1", ascending=False)

Unnamed: 0,precision,recall,f1
cat,1.0,1.0,1.0
cats,0.933764,0.933764,0.933764
dog,0.891025,0.891025,0.891025
king,0.875994,0.875994,0.875994
man,0.874707,0.874707,0.874707
love,0.872291,0.872291,0.872291
sun,0.860979,0.860979,0.860979
pluto,0.809962,0.847728,0.828415
I ate dinner,0.825392,0.815105,0.820216


In [4]:
print(type(results))

<class 'dict'>


# Load data

In [3]:
import sys
import json
import pandas as pd

In [4]:
dts = "training"
# dts = "validation"
path = 'MuP_dataset/'+dts+'_complete.jsonl'
with open(path, 'r') as json_file:
    json_list = list(json_file)

In [5]:
col_name = ["paper_id","summary"]
summary_df = pd.DataFrame(columns=col_name)
for json_str in json_list:
    result = json.loads(json_str)
    df = pd.DataFrame([[result["paper_id"], result["summary"]]], columns=col_name)
    summary_df = pd.concat([summary_df,df])
summary_df

Unnamed: 0,paper_id,summary
0,SP:4d08cdb2de2044bcb574a425b42963b83fbebfbc,This paper investigates kernel ridge-less regr...
0,SP:b80bc890180934092cde037b49d94d6e4e06fad9,This paper presents a novel way of making full...
0,SP:09f2fe6a482bbd6f9bd2c62aa841f995171ba939,This paper proposes a new framework that compu...
0,SP:a1e2218e6943bf138aeb359e23628676b396ed66,This work proposes a deep reinforcement learni...
0,SP:43e525fb3fa611df7fd44bd3bc9843e57b154c66,This paper proposes 3 deep generative models b...
...,...,...
0,SP:0d872fb4321f3a4a3fc61cf4d33b0c7e33f2d695,This paper presents a RNN-RL based method for ...
0,SP:4706017e6f8b958c7d0825fed98b285ea2994b59,This paper proposes a new pointwise convolutio...
0,SP:4706017e6f8b958c7d0825fed98b285ea2994b59,This paper presents a new pointwise convolutio...
0,SP:63ad3be1dae7ede5c02a847304072c1cbc91b1cb,This paper proposes to model various uncertain...


In [6]:
num_paper = summary_df.groupby(['paper_id']).count()
num_paper['num_paper'] = 1
num_paper.groupby(['summary']).count()

Unnamed: 0_level_0,num_paper
summary,Unnamed: 1_level_1
1,2276
2,2845
3,2116
4,1113
5,10
6,15
7,4


In [7]:
num_paper.drop('num_paper', inplace=True,axis=1)
num_paper.sort_values(["summary"])

Unnamed: 0_level_0,summary
paper_id,Unnamed: 1_level_1
SP:fff5b8e98a9909fb289cd1455d381df4b75f01fe,1
SP:c258573dc59f202955f20e00f7682963aab0a1e6,1
SP:8fb2da71029fc4096f279c5873a2c55e8afaa947,1
SP:348b7ab8ecfe2e7cfce697d8a1f9917880e95f62,1
SP:34963abebc7bff45ac1b8fae499ceea8900c2852,1
...,...
SP:f2325f97eb57b82ce95c86638af9d733d325b45d,6
SP:ddc796b9185d372f4d0829f436bbca50c3990867,7
SP:2cf4a3964537ff5dd1f7b600ab567b4d0b3cc03e,7
SP:f202f3d6780876a0bdd7d7bd4d7047719a145177,7


In [8]:
list_paper_id = []
for i in range(1, max(num_paper['summary'])+1):
    list_paper_id.append(list((num_paper[num_paper["summary"]==i]).index))
for id in list_paper_id:
    print(len(id))

2276
2845
2116
1113
10
15
4


In [9]:
df_list = []
for i in range(len(list_paper_id)):
    df_list.append(summary_df[summary_df.paper_id.isin(list_paper_id[i])].sort_values("paper_id"))
df_list[1][:20]

Unnamed: 0,paper_id,summary
0,SP:0007ee9ce7dfaf12a7dff4aa2979403aed9397d7,This paper proposed a novel benchmark for rela...
0,SP:0007ee9ce7dfaf12a7dff4aa2979403aed9397d7,The paper focuses on the relation extraction t...
0,SP:001a31f7a439ab22943dedb4fa4d46e3dd56e603,This paper is an interesting exploratory study...
0,SP:001a31f7a439ab22943dedb4fa4d46e3dd56e603,This paper explores learning chess from raw no...
0,SP:001e57e71bafdb52d6511bdd6aa73b78d60248f2,The manuscript considers the problem of imitat...
0,SP:001e57e71bafdb52d6511bdd6aa73b78d60248f2,"The paper proposes an imitation method, I2L, t..."
0,SP:004f41dfc870c5a9b347d118d7e41d7c3db77b91,This paper uses pruning and model distillation...
0,SP:004f41dfc870c5a9b347d118d7e41d7c3db77b91,This paper proposes a new framework which comb...
0,SP:00578dd55a640c10dbf22f647b736e49f6ee3c32,"The authors introduce CP-Flows, a way to param..."
0,SP:00578dd55a640c10dbf22f647b736e49f6ee3c32,This paper proposes the flow based representat...


In [10]:
def n_scores(n, score_col=["Bert"]):
    pairs = []
    for i in range(n):
        for j in range (i+1,n):
            pairs.append(str(i+1)+"-"+str(j+1))
    col = pd.MultiIndex.from_product([score_col, pairs])
    scores = pd.DataFrame(columns=col)
    scores.insert(0, "paper_id", list_paper_id[n-1])
    scores.set_index("paper_id", inplace=True)
    return scores

def bertscore_cal(n):
    df_scores = n_scores(n, ["Bert"])
    for num_id,id in enumerate(list_paper_id[n-1]):
        sumaries = list(df_list[n-1][df_list[n-1]["paper_id"]==id]["summary"])
        sys.stdout.write("\r" + str(num_id+1) + "/" + str(len(list_paper_id[n-1])))
        sys.stdout.flush()
        # print(id,"/",len(list_paper_id[n-1]), flush=True)
        for i in range(n):
            for j in range(i+1,n):
                score = bertscore.compute(predictions=[sumaries[i]], references=[sumaries[j]], lang="en")

                pair = str(i+1)+"-"+str(j+1)
                df_scores.loc[id]['Bert'][pair] = score['f1'][0]
        #         print(score)
        #         print(score['f1'][0])
        #         break
        #     break
        # break
    # df_score2
    # len(list_paper_id)
    print("\n")
    return df_scores

In [11]:
for n in range(2,len(list_paper_id)):
    print("Calculating BERTScore on", n, "summaries")
    df_scores = bertscore_cal(n)
    df_scores.to_csv("visualization_data/BertScore-between-sum/"+dts+"_bertscore_"+str(n)+"sum.csv")
    # break

Calculating ROUGE on 2 summaries
2844/2845

Calculating ROUGE on 3 summaries
9/2116

KeyboardInterrupt: 