# Generate abstractive summary for every document based on predicted contribution statements

In [56]:
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
import pandas as pd
import numpy as np
from transformers import TrainingArguments, Trainer
from datasets import load_metric
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import os
from torch.nn.utils.rnn import pack_padded_sequence, pad_sequence, pad_packed_sequence
from sklearn.metrics import f1_score, accuracy_score
import matplotlib.pyplot as plt
from constants import BASE_DIR

In [None]:
# Google Pegasus model not used as it generates very short summaries

# from transformers import PegasusForConditionalGeneration, PegasusTokenizer
# import torch
# model_name = "google/pegasus-xsum"
# device = "cuda" if torch.cuda.is_available() else "cpu"
# tokenizer = PegasusTokenizer.from_pretrained(model_name)
# model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)

### Use Facebook Bart model to generate abstract summaries

In [33]:
from transformers import BartForConditionalGeneration, BartTokenizer
import torch

model_name = "sshleifer/distilbart-cnn-12-6"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name).to(device)


Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.76k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.14G [00:00<?, ?B/s]

### Loop through data predicted in validation set and generate summaries

In [14]:
val_df = pd.read_csv(os.path.join(BASE_DIR, 'generated_data', 'val_df_predicted.csv'))
val_df.head()

Unnamed: 0,doc_num,sentence,target,doc_path,abstract,predicted
0,7,We present a memory augmented neural network f...,0,/Users/rohantondulkar/Projects/Typeset/trial-d...,We present a memory augmented neural network f...,0
1,7,NSE is equipped with a novel memory update rul...,0,/Users/rohantondulkar/Projects/Typeset/trial-d...,We present a memory augmented neural network f...,1
2,7,NSE can also access 1 multiple and shared memo...,0,/Users/rohantondulkar/Projects/Typeset/trial-d...,We present a memory augmented neural network f...,1
3,7,"In this paper , we demonstrated the effectiven...",1,/Users/rohantondulkar/Projects/Typeset/trial-d...,We present a memory augmented neural network f...,1
4,7,"For example , our shared - memory model showed...",0,/Users/rohantondulkar/Projects/Typeset/trial-d...,We present a memory augmented neural network f...,1


In [38]:
doc_nums = val_df['doc_num'].unique()
generated_summaries = []
abstract_ground_truth = []
for doc_num in doc_nums:
    abstract = val_df[(val_df['doc_num'] == doc_num) & (val_df['predicted'] == 1)]['abstract'].iloc[0]
    abstract_ground_truth.append(abstract)
    contrib_sentences = val_df[(val_df['doc_num'] == doc_num) & (val_df['predicted'] == 1)]['sentence'].to_list()
    contrib_sentences = ' '.join(contrib_sentences)
    batch = tokenizer(contrib_sentences, truncation=True, padding="longest", return_tensors="pt").to(device)
    translated = model.generate(**batch)
    tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    generated_summaries.append(tgt_text)

#### Generate scores for various Rouge metrics

In [48]:
from datasets import load_metric

rouge_score = load_metric("rouge")
scores = rouge_score.compute(
    predictions=generated_summaries, references=abstract_ground_truth
)
print(scores['rouge1'].mid)
print(scores['rouge2'].mid)
print(scores['rougeL'].mid)


Score(precision=0.76156014117662, recall=0.36139191482009186, fmeasure=0.479370646063431)
Score(precision=0.5570170173965329, recall=0.2697756784048152, fmeasure=0.35291120305554746)
Score(precision=0.6452480020385981, recall=0.31132330177128353, fmeasure=0.4095962525242688)


#### Generate scores for Bert score

In [50]:
from datasets import load_metric
bert_score_metric = load_metric("bertscore")
bert_scores = bert_score_metric.compute(
    predictions=generated_summaries, references=abstract_ground_truth, lang='en'
)
precision = np.average(bert_scores['precision'])
recall = np.average(bert_scores['recall'])
f1_score = np.average(bert_scores['f1'])
print(f'Bert scores - Precision: {precision}, Recall: {recall}, F1 score: {f1_score}')

Bert scores - Precision: 0.9055407404899597, Recall: 0.8540546655654907, F1 score: 0.8788825869560242


#### Store metric scores in results folder

In [55]:
score_records = []
score_records.append({
    'metric': 'rouge1', 'precision':scores['rouge1'].mid.precision, 'recall':scores['rouge1'].mid.recall, 'f1':scores['rouge1'].mid.fmeasure
})
score_records.append({
    'metric': 'rouge2', 'precision':scores['rouge2'].mid.precision, 'recall':scores['rouge2'].mid.recall, 'f1':scores['rouge2'].mid.fmeasure
})
score_records.append({
    'metric': 'rougeL', 'precision':scores['rougeL'].mid.precision, 'recall':scores['rougeL'].mid.recall, 'f1':scores['rougeL'].mid.fmeasure
})
score_records.append({
    'metric': 'bert score', 'precision':np.average(bert_scores['precision']), 'recall':np.average(bert_scores['recall']), 
    'f1':np.average(bert_scores['f1'])
})
scores_df = pd.DataFrame(score_records)
scores_df.to_csv(os.path.join(BASE_DIR, 'results', 'scores.csv'), index=False)
scores_df.head()

Unnamed: 0,metric,precision,recall,f1
0,rouge1,0.76156,0.361392,0.479371
1,rouge2,0.557017,0.269776,0.352911
2,rougeL,0.645248,0.311323,0.409596
3,bert score,0.905541,0.854055,0.878883
