In [None]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from transformers import BartTokenizer

class SummarizationDataset(Dataset):
    def __init__(self, file_path, tokenizer, max_length=512):
        self.dataset = pd.read_csv(file_path)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        text = self.dataset.iloc[idx, 0]
        summary = self.dataset.iloc[idx, 1]
        
        inputs = self.tokenizer.encode_plus(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        targets = self.tokenizer.encode_plus(
            summary,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': targets['input_ids'].flatten()
        }

tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')
val_dataset = SummarizationDataset('/home/mohan/infy/data/merged/final/validation.csv', tokenizer)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

In [None]:
import torch
from transformers import BartForConditionalGeneration

model_path = '/home/mohan/infy/models/fine_tuned_bart'
model = BartForConditionalGeneration.from_pretrained(model_path)
tokenizer = BartTokenizer.from_pretrained(model_path)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

In [None]:
from datasets import load_metric

def evaluate_model(model, dataloader, tokenizer):
    model.eval()
    rouge = load_metric('rouge')

    for batch in dataloader:
        inputs = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        with torch.no_grad():
            outputs = model.generate(
                input_ids=inputs,
                attention_mask=attention_mask,
                max_length=150,
                min_length=40,
                num_beams=4,
                no_repeat_ngram_size=3,
                early_stopping=True
            )

        decoded_preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

        rouge.add_batch(predictions=decoded_preds, references=decoded_labels)

    result = rouge.compute()
    return result

results = evaluate_model(model, val_loader, tokenizer)
print(results)
