In [1]:
# Install necessary libraries (if not installed)
!pip install transformers rouge-score nltk scikit-learn tqdm

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=072336b57c80d4b260ca860aff30e26b5dbd26ffa62b9227cde153118b72fc7b
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [2]:
import pandas as pd
import numpy as np
import torch
from transformers import BartTokenizer, BartForConditionalGeneration
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from tqdm import tqdm
from google.colab import files

# Upload CSV File
print("Upload your dataset (CSV file)...")
uploaded = files.upload()

# Get the uploaded file name
file_name = list(uploaded.keys())[0]
print(f"Dataset uploaded: {file_name}")

# Load dataset
df = pd.read_csv(file_name)

# Ensure the dataset contains the expected column
if 'sentence' not in df.columns:
    raise ValueError("Dataset must contain a column named 'sentence'")

# Check if CUDA is available (for GPU acceleration)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load BART model
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name).to(device)

def paraphrase(text):
    """Paraphrase input text using BART model."""
    encoding = tokenizer.encode_plus(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    input_ids, attention_mask = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)

    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=512,
        num_return_sequences=1,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        repetition_penalty=2.0
    )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Apply paraphrasing to the dataset
tqdm.pandas()
df["paraphrased_sentence"] = df["sentence"].progress_apply(paraphrase)

# Compute similarity using TF-IDF + Cosine Similarity
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df["sentence"].tolist() + df["paraphrased_sentence"].tolist())

# Extracting the similarity scores
original_vectors = tfidf_matrix[:len(df)]
paraphrased_vectors = tfidf_matrix[len(df):]

similarities = [cosine_similarity(original_vectors[i], paraphrased_vectors[i])[0][0] for i in range(len(df))]

# Add similarity scores to DataFrame
df["similarity_score"] = similarities

# Calculate mean and standard deviation of similarity scores
mean_similarity = np.mean(similarities)
std_dev_similarity = np.std(similarities)

# Initialize ROUGE and BLEU score computation
scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
bleu_scores = []
rouge_scores = {"rouge-1": [], "rouge-2": [], "rouge-l": []}

smoothing = SmoothingFunction().method1

for i in range(len(df)):
    ref = df.loc[i, "sentence"]
    pred = df.loc[i, "paraphrased_sentence"]

    # Compute BLEU score
    bleu = sentence_bleu([ref.split()], pred.split(), smoothing_function=smoothing)
    bleu_scores.append(bleu)

    # Compute ROUGE scores
    rouge_score = scorer.score(ref, pred)
    rouge_scores["rouge-1"].append(rouge_score["rouge1"].fmeasure)
    rouge_scores["rouge-2"].append(rouge_score["rouge2"].fmeasure)
    rouge_scores["rouge-l"].append(rouge_score["rougeL"].fmeasure)

# Add BLEU and ROUGE scores to DataFrame
df["bleu_score"] = bleu_scores
df["rouge-1"] = rouge_scores["rouge-1"]
df["rouge-2"] = rouge_scores["rouge-2"]
df["rouge-l"] = rouge_scores["rouge-l"]

# Calculate mean and standard deviation for BLEU and ROUGE scores
mean_bleu = np.mean(bleu_scores)
std_bleu = np.std(bleu_scores)

mean_rouge_1 = np.mean(rouge_scores["rouge-1"])
std_rouge_1 = np.std(rouge_scores["rouge-1"])

mean_rouge_2 = np.mean(rouge_scores["rouge-2"])
std_rouge_2 = np.std(rouge_scores["rouge-2"])

mean_rouge_l = np.mean(rouge_scores["rouge-l"])
std_rouge_l = np.std(rouge_scores["rouge-l"])

# Save results
output_file = "Paraphrase_Results.csv"
df.to_csv(output_file, index=False)

# Provide download link for output file
from google.colab import files
files.download(output_file)

# Print evaluation results
print(f"Results saved to {output_file}")
print(f"Mean Similarity Score: {mean_similarity:.4f}")
print(f"Standard Deviation of Similarity Scores: {std_dev_similarity:.4f}")
print(f"Mean BLEU Score: {mean_bleu:.4f}, Standard Deviation: {std_bleu:.4f}")
print(f"Mean ROUGE-1 Score: {mean_rouge_1:.4f}, Standard Deviation: {std_rouge_1:.4f}")
print(f"Mean ROUGE-2 Score: {mean_rouge_2:.4f}, Standard Deviation: {std_rouge_2:.4f}")
print(f"Mean ROUGE-L Score: {mean_rouge_l:.4f}, Standard Deviation: {std_rouge_l:.4f}")

Upload your dataset (CSV file)...


Saving paraphtrase_dataset.csv to paraphtrase_dataset.csv
Dataset uploaded: paraphtrase_dataset.csv
Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

100%|██████████| 480/480 [07:00<00:00,  1.14it/s]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Results saved to Paraphrase_Results.csv
Mean Similarity Score: 0.5685
Standard Deviation of Similarity Scores: 0.1059
Mean BLEU Score: 0.1522, Standard Deviation: 0.0380
Mean ROUGE-1 Score: 0.2965, Standard Deviation: 0.0470
Mean ROUGE-2 Score: 0.2714, Standard Deviation: 0.0476
Mean ROUGE-L Score: 0.2965, Standard Deviation: 0.0470
