In [18]:
import pandas as pd
from transformers import BartForConditionalGeneration, BartTokenizer
from datasets import load_metric

In [19]:
validation_data = pd.read_csv('/home/mohan/infy/data/fined/valid.csv')

input_texts = validation_data['text'].tolist()
target_texts = validation_data['summary'].tolist()

In [20]:
validation_data

Unnamed: 0.1,Unnamed: 0,text,summary
0,0,SECTION 1. SHORT TITLE.\n\n This Act may be...,Promoting Closed-Loop Pumped Storage Hydropowe...
1,1,SECTION 1. DEDUCTION FOR HEALTH AND LONG-TERM ...,Amends the Internal Revenue Code to allow indi...
2,2,One of Argentina's top referees is under inves...,German Delfino faces ban after overturning dec...
3,3,At least 35 workers have been killed at a dair...,Dairy factory in Red Sea port city of Hodeida ...
4,4,Daniel Levy reportedly told the Tottenham Hots...,Tottenham Hotspur chairman Daniel Levy has tol...
...,...,...,...
95,95,Gary Cahill has hinted Chelsea are feeling the...,Chelsea could only manage a draw at home to So...
96,96,In a ruling that could end the reign of illega...,"Companies will have to reveal names, IP addres..."
97,97,England's Joe Root has every hope the team can...,Joe Root hit 118 not out to help England into ...
98,98,SECTION 1. SHORT TITLE.\n\n This Act may be...,Fair Credit Card Application Act of 1997 - Ame...


In [21]:
# Load your fine-tuned model and tokenizer
model_path =  '/home/mohan/infy/models/fine_tuned_Text_Summ/saved' 
model = BartForConditionalGeneration.from_pretrained(model_path)
tokenizer = BartTokenizer.from_pretrained(model_path)

In [22]:
# Load the ROUGE metric
rouge = load_metric('rouge')

# Function to generate summaries
def generate_summary(text):
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(inputs.input_ids, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Generate summaries for the validation set 
generated_summaries = [generate_summary(text) for text in input_texts]

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [23]:
rouge_scores = rouge.compute(predictions=generated_summaries, references=target_texts)

In [24]:
for key, value in rouge_scores.items():
    print(f"{key}: {value.mid}")

rouge1: Score(precision=0.6595068808836118, recall=0.6445753055143894, fmeasure=0.6242702506030464)
rouge2: Score(precision=0.5253762966763771, recall=0.5138325274953983, fmeasure=0.498804972593745)
rougeL: Score(precision=0.5774826585948081, recall=0.5705118629443797, fmeasure=0.5515500092228083)
rougeLsum: Score(precision=0.6283288957382498, recall=0.6142534967039112, fmeasure=0.5960371270978608)
