In [2]:
import pandas as pd
import torch
from transformers import BartTokenizer, BartForConditionalGeneration

# Load the pre-trained BART model and tokenizer
model_name = 'facebook/bart-large-cnn'
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Load the dataset
file_path = '/content/sampled_dataECBGoldSilver.csv'
data = pd.read_csv(file_path)

def compute_bartscore(source_texts, generated_texts):
    bart_scores = []

    for src, gen in zip(source_texts, generated_texts):
        # Check if src or gen are NaN and skip if so
        if pd.isna(src) or pd.isna(gen):
            bart_scores.append(None) # Append None for NaN values
            continue

        # Encode the source and generated texts
        input_ids = tokenizer.encode(src, return_tensors='pt')
        output_ids = tokenizer.encode(gen, return_tensors='pt')

        # Calculate the log likelihood of the generated text given the source text
        with torch.no_grad():
            output = model(input_ids=input_ids, labels=output_ids)
            loss = output.loss.item()
            # BARTScore is negative log likelihood
            bart_score = -loss
            bart_scores.append(bart_score)

    return bart_scores

# Prepare data: Gold_Sentence as reference, sampled_dataECBGoldSilver as generated text
source_texts = data['Gold_Sentence'].tolist()
generated_texts = data['amalREC_Test_Sentence'].tolist()

# Compute BARTScore for the dataset
bart_scores = compute_bartscore(source_texts, generated_texts)

# Add BARTScores to the dataframe
data['BARTScore'] = bart_scores

# Calculate the average BARTScore, ignoring NaN values
average_bartscore = data['BARTScore'].mean(skipna=True)

average_bartscore, data[['amalREC_Test_Sentence', 'Gold_Sentence', 'BARTScore']].head()

(-2.5820681177193996,
                                amalREC_Test_Sentence  \
 0  Renowned for his leadership prowess, the Grand...   
 1  The renowned Chislehurst and Sidcup Grammar Sc...   
 2  Nestled amidst the picturesque landscapes of t...   
 3  As the rector of Kremenchuk Mykhailo Ostrohrad...   
 4  Kraai River, originating in the rolling hills ...   
 
                                        Gold_Sentence  BARTScore  
 0  The distinguished group, Grande Armée had the ...  -2.967072  
 1  Chislehurst and Sidcup Grammar School a promin...  -2.833492  
 2  Réal can be found within the department of Pyr...  -2.701323  
 3  Kremenchuk Mykhailo Ostrohradskyi National Uni...  -2.294199  
 4  Kraai River is an important source of water fo...  -3.193003  )