In [None]:
import pandas as pd
import string
import nltk
from nltk.translate.rouge_score import rouge_n, rouge_l


In [None]:

class ROUGEScoreCalculator:
    def __init__(self, dataset):
        self.dataset = dataset

    def calculate_rouge_score(self):
        total_rows = self.dataset.shape[0]
        tot_score = []

        for i in range(total_rows):
            scores = self.calculate_rouge_scores(i)
            tot_score.append(scores)

        return tot_score

    def calculate_rouge_scores(self, i):
        sentence1 = self.preprocess_text(self.dataset["sentence1"][i])
        sentence2 = self.preprocess_text(self.dataset["sentence2"][i])

        # Calculate ROUGE-1 score with sentence1 as candidate and sentence2 as reference
        rouge_1_score = self.calculate_rouge(reference=sentence2, candidate=sentence1, n=1)

        # Calculate ROUGE-2 score with sentence1 as candidate and sentence2 as reference
        rouge_2_score = self.calculate_rouge(reference=sentence2, candidate=sentence1, n=2)

        # Calculate ROUGE-L score with sentence1 as candidate and sentence2 as reference
        rouge_l_score = self.calculate_rouge_l(reference=sentence2, candidate=sentence1)

        # Calculate ROUGE-1 score with sentence2 as candidate and sentence1 as reference
        rouge_1_score_reverse = self.calculate_rouge(reference=sentence1, candidate=sentence2, n=1)

        # Calculate ROUGE-2 score with sentence2 as candidate and sentence1 as reference
        rouge_2_score_reverse = self.calculate_rouge(reference=sentence1, candidate=sentence2, n=2)

        # Calculate ROUGE-L score with sentence2 as candidate and sentence1 as reference
        rouge_l_score_reverse = self.calculate_rouge_l(reference=sentence1, candidate=sentence2)

        return {
            "rouge_1_score": (rouge_1_score + rouge_1_score_reverse) / 2.0,
            "rouge_2_score": (rouge_2_score + rouge_2_score_reverse) / 2.0,
            "rouge_l_score": (rouge_l_score + rouge_l_score_reverse) / 2.0
        }

    def calculate_rouge(self, reference, candidate, n):
        return rouge_n([reference], [candidate], n=n)

    def calculate_rouge_l(self, reference, candidate):
        return rouge_l([reference], [candidate])

    def preprocess_text(self, text):
        text = text.translate(str.maketrans('', '', string.punctuation))
        text = text.replace('।', '')
        return text




In [None]:

bnpc_dataset = pd.read_csv("BnPC_paraphrase_dataset.csv")
bnpc_calculator = ROUGEScoreCalculator(bnpc_dataset)
bnpc_rouge_scores = bnpc_calculator.calculate_rouge_score()


print("BNPC ROUGE Scores:")
for i, scores in enumerate(bnpc_rouge_scores):
    print(f"Pair {i + 1}: ROUGE-1: {scores['rouge_1_score']:.2f}, ROUGE-2: {scores['rouge_2_score']:.2f}, ROUGE-L: {scores['rouge_l_score']:.2f}")

# Repeat the same for BUET and indic datasets


In [None]:
BUET_dataset = pd.read_csv("BUET_paraphrase_dataset.csv")
BUET_calculator = ROUGEScoreCalculator(BUET_dataset)
BUET_rouge_scores = BUET_calculator.calculate_rouge_score()

print("BUET ROUGE Scores:")
for i, scores in enumerate(BUET_rouge_scores):
    print(f"Pair {i + 1}: ROUGE-1: {scores['rouge_1_score']:.2f}, ROUGE-2: {scores['rouge_2_score']:.2f}, ROUGE-L: {scores['rouge_l_score']:.2f}")

In [None]:

indic_dataset = pd.read_csv("indic_paraphrase_dataset.csv")
indic_calculator = ROUGEScoreCalculator(indic_dataset)
indic_rouge_scores = indic_calculator.calculate_rouge_score()

print("Indic Scores:")
for i, scores in enumerate(indic_rouge_scores):
    print(f"Pair {i + 1}: ROUGE-1: {scores['rouge_1_score']:.2f}, ROUGE-2: {scores['rouge_2_score']:.2f}, ROUGE-L: {scores['rouge_l_score']:.2f}")

NameError: ignored

In [None]:

tapaco_dataset = pd.read_csv("BnPC_paraphrase_dataset.csv")
tapaco_calculator = ROUGEScoreCalculator(tapaco_dataset)
tapaco_rouge_scores = tapaco_calculator.calculate_rouge_score()


print("BNPC ROUGE Scores:")
for i, scores in enumerate(tapaco_rouge_scores):
    print(f"Pair {i + 1}: ROUGE-1: {scores['rouge_1_score']:.2f}, ROUGE-2: {scores['rouge_2_score']:.2f}, ROUGE-L: {scores['rouge_l_score']:.2f}")

