## Libraries

In [1]:
import sys 
sys.path.append(r"C:\Pro\Stages\A4 - DVRC\Work\Supervised-Learning-using-Unsupervised-Learning-Metrics-in-the-absence-of-Annotated-Data\myLibraries")

from custom_score.utils import get_git_root
sys.path.append(get_git_root())

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from custom_score.score import BERTScoreStaticSampleTest, BERTScoreDynamicSampleTest, BARTScoreDynamicSampleTest
from custom_score.utils import serialized_to_model
from BARTScore.bart_score import BARTScorer
import torch
import tensorflow_datasets as tfds
from datetime import datetime
import pandas as pd
import numpy as np

### Checkup and linkage

In [3]:
torch.cuda.is_available()

True

## Datasets

### Billsum

In [None]:
billsumTest_url='https://drive.google.com/file/d/1Wd0M3qepNF6B4YwFYrpo7CaSERpudAG_/view?usp=share_link'
billsumTest_url='https://drive.google.com/uc?id=' + billsumTest_url.split('/')[-2]
billsum_test = pd.read_json(billsumTest_url, lines=True)
billsum_test = billsum_test.loc[:, ["text", "summary"]]
billsum_test.head(5)

### Gigaword

In [None]:
gigaword_builder = tfds.builder("Gigaword")
gigaword_builder.download_and_prepare()
gigaword = gigaword_builder.as_dataset()

In [None]:
gigaword = tfds.as_dataframe(gigaword["test"])
gigaword = gigaword.rename(columns={"document":"text", "summary":"summary"})
gigaword['summary'] = gigaword['summary'].str.decode("utf-8").str.strip().str.replace("#", "")
gigaword['text'] = gigaword['text'].str.decode("utf-8").str.strip().str.replace("#", "")
gigaword.head(5)

### Multinews

In [None]:
multinews_builder = tfds.builder("MultiNews")
multinews_builder.download_and_prepare()
multinews = multinews_builder.as_dataset()

In [None]:
multinews = tfds.as_dataframe(multinews["test"])
multinews = multinews.rename(columns={"document":"text", "summary":"summary"})
multinews['summary'] = multinews['summary'].str.decode("utf-8").str.replace("\xe2\x80\x93", "").str[2:].str.strip()
multinews['text'] = multinews['text'].str.decode("utf-8").str.strip()
multinews.head(5)

### PubMed

In [None]:
pm_path = r"C:\Pro\Stages\A4 - DVRC\Work\Datasets\pubmed\test.json"
pubmed_test = pd.read_json(pm_path, lines=True)
pm = pubmed_test[["article_text", "abstract_text"]]
cleaner = lambda x: ". ".join(x).replace("<S>", "").strip()

pm.loc[:,"abstract_text"] = pm["abstract_text"].replace(regex=r"\[[^\]]*\]", value="")
pm.loc[:,"article_text"] = pm["article_text"].replace(regex=r"\[[^\]]*\]", value="")
pm.loc[:,"abstract_text"] = pm["abstract_text"].map(cleaner)
pm.loc[:,"article_text"] = pm["article_text"].map(cleaner)
pubmed = pm.copy()

del pm 
del pubmed_test

pubmed.head()

## Benchmark

In [None]:
results = {}
lim = 10

### Billsum

In [None]:
w2v = serialized_to_model(r'C:\Pro\Stages\A4 - DVRC\Work\Models\serialized_w2v.pkl')
bert_scores, bert_runtime = BERTScoreDynamicSampleTest(billsum_test, limit=lim)
word2vec_scores, word2vec_runtime = BERTScoreStaticSampleTest(billsum_test, w2v, lim, withIdf = False)
bart_scores, bart_runtime = BARTScoreDynamicSampleTest(billsum_test, limit=lim)

In [None]:
billsum_results = {'BERTScore': [bert_scores ,bert_runtime], 'Custom': [word2vec_scores, word2vec_runtime], 'Bart': [bart_scores, bart_runtime]}
results['billsum'] = billsum_results

In [None]:
bart_scores, bart_runtime = BARTScoreDynamicSampleTest(billsum_test, limit=lim)

In [None]:
bart_scores

### Multinews

In [None]:
bert_scores, bert_runtime = BERTScoreDynamicSampleTest(multinews, limit=lim)
word2vec_scores, word2vec_runtime = BERTScoreStaticSampleTest(multinews, w2v, lim, withIdf = False)
bart_scores, bart_runtime = BARTScoreDynamicSampleTest(multinews, limit=lim)

In [None]:
multinews_results = {'BERTScore': [bert_scores ,bert_runtime], 'Custom': [word2vec_scores, word2vec_runtime], 'Bart': [bart_scores, bart_runtime]}
results['multinews'] = multinews_results

### Gigaword

In [None]:
bert_scores, bert_runtime = BERTScoreDynamicSampleTest(gigaword, limit=lim)
word2vec_scores, word2vec_runtime = BERTScoreStaticSampleTest(gigaword, w2v, lim, withIdf = False)
bart_scores, bart_runtime = BARTScoreDynamicSampleTest(gigaword, limit=lim)

In [None]:
gigaword_results = {'BERTScore': [bert_scores ,bert_runtime], 'Custom': [word2vec_scores, word2vec_runtime], 'Bart': [bart_scores, bart_runtime]}
results['gigaword'] = gigaword_results

### PubMed

In [None]:
bert_scores, bert_runtime = BERTScoreDynamicSampleTest(pubmed, limit=lim)
word2vec_scores, word2vec_runtime = BERTScoreStaticSampleTest(pubmed, w2v, lim, withIdf = False)
bart_scores, bart_runtime = BARTScoreDynamicSampleTest(pubmed, limit=lim)

In [None]:
pubmed_results = {'BERTScore': [bert_scores ,bert_runtime], 'Custom': [word2vec_scores, word2vec_runtime], 'Bart': [bart_scores, bart_runtime]}
results['pubmed'] = pubmed_results

## Evalutation

### Runtime Table

In [None]:
billsum_runtimes = [results["billsum"]["BERTScore"][1], results["billsum"]["Custom"][1], results["billsum"]["Bart"][1]]
multinews_runtimes = [results["multinews"]["BERTScore"][1], results["multinews"]["Custom"][1], results["multinews"]["Bart"][1]]
gigaword_runtimes = [results["gigaword"]["BERTScore"][1], results["gigaword"]["Custom"][1], results["gigaword"]["Bart"][1]]
pubmed_runtimes = [results["scipa"]["BERTScore"][1], results["scipa"]["Custom"][1], results["scipa"]["Bart"][1]]

runtimeDf = pd.DataFrame({"Billsum": billsum_runtimes, 
                          "Multinews": multinews_runtimes,
                          "Gigaword": gigaword_runtimes,
                          "PubMed": pubmed}, 
                         index=["Roberta-24-layers", "Word2Vec", "BART-large-CNN"])
runtimeDf

### Quality evaluation

#### Billsum

In [None]:
billsumQualityDf = pd.concat((pd.DataFrame(results["billsum"]["Custom"][0], columns=["W2V_P", "W2V_R", "W2V_F"]),
                              pd.DataFrame(results["billsum"]["BERTScore"][0], columns=["Bert_P", "Bert_R", "Bert_F"]),
                              pd.DataFrame(results["billsum"]["Bart"][0], columns=["Bart"])),
                              axis=1)
billsumQualityDf.head(5)

#### Multinews

In [None]:
multinewsQualityDf = pd.concat((pd.DataFrame(results["multinews"]["Custom"][0], columns=["W2V_P", "W2V_R", "W2V_F"]),
                              pd.DataFrame(results["multinews"]["BERTScore"][0], columns=["Bert_P", "Bert_R", "Bert_F"]),
                              pd.DataFrame(results["multinews"]["Bart"][0], columns=["Bart"])),
                              axis=1)
multinewsQualityDf.head(5)

#### Gigaword

In [None]:
gigawordQualityDf = pd.concat((pd.DataFrame(results["gigaword"]["Custom"][0], columns=["W2V_P", "W2V_R", "W2V_F"]),
                              pd.DataFrame(results["gigaword"]["BERTScore"][0], columns=["Bert_P", "Bert_R", "Bert_F"]),
                              pd.DataFrame(results["gigaword"]["Bart"][0], columns=["Bart"])),
                              axis=1)
gigawordQualityDf.head(5)

#### Scientific Papers

In [None]:
pubmedQualityDf = pd.concat((pd.DataFrame(results["pubmed"]["Custom"][0], columns=["W2V_P", "W2V_R", "W2V_F"]),
                              pd.DataFrame(results["pubmed"]["BERTScore"][0], columns=["Bert_P", "Bert_R", "Bert_F"]),
                              pd.DataFrame(results["pubmed"]["Bart"][0], columns=["Bart"])),
                              axis=1)
pubmedQualityDf.head(5)

#### Summary

In [None]:
meanGigawordQualityDf = gigawordQualityDf.mean(axis=0)
meanMultinewsQualityDf = multinewsQualityDf.mean(axis=0)
meanBillsumQualityDf = billsumQualityDf.mean(axis=0)
meanPubmedQualityDf = pubmedQualityDf.mean(axis=0)

summaryDf = pd.DataFrame({"Gigaword":meanGigawordQualityDf,
                          "Billsum": meanBillsumQualityDf,
                          "Multinews": meanMultinewsQualityDf,
                          "PubMed": meanPubmedQualityDf})
summaryDf

## Shuffled Data

### Shuffling datasets

In [None]:
shuffledGigaword = gigaword.copy()
shuffledGigaword["summary"] = shuffledGigaword["summary"].sample(frac=1).values

shuffledMultinews = multinews.copy()
shuffledMultinews["summary"] = shuffledMultinews["summary"].sample(frac=1).values

shuffledBillsum = billsum_test.copy()
shuffledBillsum["summary"] = shuffledBillsum["summary"].sample(frac=1).values

In [None]:
shuffledMultinews.head(3)

In [None]:
shuffledBillsum.head(3)

In [None]:
shuffledGigaword.head(3)

### Evaluating the forged datasets

In [None]:
results = {}
lim = 20

w2v = serialized_to_model(r'C:\Pro\Stages\A4 - DVRC\Work\Models\serialized_w2v.pkl')
bert_scores, bert_runtime = BERTScoreDynamicSampleTest(shuffledBillsum, limit=lim)
word2vec_scores, word2vec_runtime = BERTScoreStaticSampleTest(shuffledBillsum, w2v, lim, withIdf = False)
bart_scores, bart_runtime = BARTScoreDynamicSampleTest(shuffledBillsum, limit=lim)
billsum_results = {'BERTScore': [bert_scores ,bert_runtime], 'Custom': [word2vec_scores, word2vec_runtime], 'Bart': [bart_scores, bart_runtime]}
results['billsum'] = billsum_results

bert_scores, bert_runtime = BERTScoreDynamicSampleTest(shuffledMultinews, limit=lim)
word2vec_scores, word2vec_runtime = BERTScoreStaticSampleTest(shuffledMultinews, w2v, lim, withIdf = False)
bart_scores, bart_runtime = BARTScoreDynamicSampleTest(shuffledMultinews, limit=lim)
multinews_results = {'BERTScore': [bert_scores ,bert_runtime], 'Custom': [word2vec_scores, word2vec_runtime], 'Bart': [bart_scores, bart_runtime]}
results['multinews'] = multinews_results

bert_scores, bert_runtime = BERTScoreDynamicSampleTest(shuffledGigaword, limit=lim)
word2vec_scores, word2vec_runtime = BERTScoreStaticSampleTest(shuffledGigaword, w2v, lim, withIdf = False)
bart_scores, bart_runtime = BARTScoreDynamicSampleTest(shuffledGigaword, limit=lim)
gigaword_results = {'BERTScore': [bert_scores ,bert_runtime], 'Custom': [word2vec_scores, word2vec_runtime], 'Bart': [bart_scores, bart_runtime]}
results['gigaword'] = gigaword_results

In [None]:
billsumQualityDf = pd.concat((pd.DataFrame(results["billsum"]["Custom"][0], columns=["W2V_P", "W2V_R", "W2V_F"]),
                              pd.DataFrame(results["billsum"]["BERTScore"][0], columns=["Bert_P", "Bert_R", "Bert_F"]),
                              pd.DataFrame(results["billsum"]["Bart"][0], columns=["Bart"])),
                              axis=1)
multinewsQualityDf = pd.concat((pd.DataFrame(results["multinews"]["Custom"][0], columns=["W2V_P", "W2V_R", "W2V_F"]),
                              pd.DataFrame(results["multinews"]["BERTScore"][0], columns=["Bert_P", "Bert_R", "Bert_F"]),
                              pd.DataFrame(results["multinews"]["Bart"][0], columns=["Bart"])),
                              axis=1)
gigawordQualityDf = pd.concat((pd.DataFrame(results["gigaword"]["Custom"][0], columns=["W2V_P", "W2V_R", "W2V_F"]),
                              pd.DataFrame(results["gigaword"]["BERTScore"][0], columns=["Bert_P", "Bert_R", "Bert_F"]),
                              pd.DataFrame(results["gigaword"]["Bart"][0], columns=["Bart"])),
                              axis=1)
meanGigawordQualityDf = gigawordQualityDf.mean(axis=0)
meanMultinewsQualityDf = multinewsQualityDf.mean(axis=0)
meanBillsumQualityDf = billsumQualityDf.mean(axis=0)

summaryDf = pd.DataFrame({"Gigaword":meanGigawordQualityDf,
                          "Billsum": meanBillsumQualityDf,
                          "Multinews": meanMultinewsQualityDf})
summaryDf