In [1]:
!pip install -q transformers  rouge-score sentence-transformers

[0m

In [2]:
from transformers import pipeline
from tqdm.notebook import tqdm
ab_summarizer = pipeline("summarization", model="t5-base")
import re 

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

2023-02-17 20:51:48.211754: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Downloading:   0%|          | 0.00/850M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [3]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

abs_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
abs_tokenizer = AutoTokenizer.from_pretrained("t5-base")

### GET GENERATED SUMMARIES FOR THE VALIDATION DATASET

In [4]:
import pandas as pd
original_texts = pd.read_csv("/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail/test.csv")

articles = original_texts["article"].tolist()
highlights = original_texts["highlights"].tolist()

reference_texts = articles[:len(articles) // 4]
reference_highlights = articles[:len(highlights) // 4]

In [5]:
def abstractive_summarize(doc,  max_length=250, min_length=50, batch_size=2):
  all_sum=[]
  # batched prediction
  for i in tqdm(range(int(len(doc) / batch_size) + 1)): 
    batch_start = i*batch_size  
    batch_end = (i+1) * batch_size if (i+1) * batch_size < len(doc) else len(doc)
    batch = ["summarize: " + x for x in doc[batch_start: batch_end]]   
    if (batch):
      inputs = abs_tokenizer.batch_encode_plus( batch, return_tensors="pt", max_length=477, truncation=True, padding=True) 
      outputs = abs_model.generate(inputs["input_ids"], max_length=max_length, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
      output = [re.sub(r'<([^>]*)>', "", abs_tokenizer.decode(x))  for x in outputs.tolist()]  
      all_sum = all_sum + output 
  return all_sum

In [6]:
generated_summaries = abstractive_summarize(reference_texts)

  0%|          | 0/1437 [00:00<?, ?it/s]

# BLUE SCORE

In [7]:
def blue_score(reference_corpus, generated_corpus):
  """
    Computes the BLUE score between two corpora of text summaries.
    :param reference_corpus: list of reference summaries (strings)
    :param generated_corpus: list of generated summaries (strings)
    :return: the BLUE score
  """
  reference_corpus = [s.split() for s in reference_corpus]
  generated_corpus = [s.split() for s in generated_corpus]
  numerator = 0
  denominator = 0
  for references, hypothesis in zip(reference_corpus, generated_corpus):
    numerator += min(len(references), len(hypothesis))
    denominator += len(references)
  return numerator / denominator


# BLUE SCORE

In [8]:
print(blue_score(reference_highlights, generated_summaries))

0.07351872121176029


# ROUGE

In [9]:
!pip install rouge

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
[0m

In [10]:
from rouge import Rouge

def get_rouge_score(reference_summaries, generated_summaries):
    # Initialize the Rouge object
    rouge = Rouge()
    
    # Calculate the ROUGE scores
    rouge_scores = rouge.get_scores(generated_summaries, reference_summaries, avg=True)
    
    # Format the scores as a dictionary
    formatted_scores = {
        'rouge-1': {
            'precision': rouge_scores['rouge-1']['p'],
            'recall': rouge_scores['rouge-1']['r'],
            'f1-score': rouge_scores['rouge-1']['f']
        },
        'rouge-2': {
            'precision': rouge_scores['rouge-2']['p'],
            'recall': rouge_scores['rouge-2']['r'],
            'f1-score': rouge_scores['rouge-2']['f']
        },
        'rouge-l': {
            'precision': rouge_scores['rouge-l']['p'],
            'recall': rouge_scores['rouge-l']['r'],
            'f1-score': rouge_scores['rouge-l']['f']
        }
    }
    
    return formatted_scores

print(get_rouge_score(reference_highlights, generated_summaries))

{'rouge-1': {'precision': 0.9095044179137473, 'recall': 0.1342096968093357, 'f1-score': 0.2286399249817001}, 'rouge-2': {'precision': 0.7441981545567203, 'recall': 0.07818746582404476, 'f1-score': 0.13804095020619286}, 'rouge-l': {'precision': 0.9018281597066025, 'recall': 0.13309159261346326, 'f1-score': 0.22673062289505894}}
