
# **AI-powered paraphrasing tool**


 **Objective:**
Develop an AI-powered paraphrasing tool as a Python application or module.

In [1]:
!pip install torch transformers sentencepiece
!pip install language-tool-python
!pip install nltk rouge-score sentence-transformers



In [2]:
import torch
import nltk
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import language_tool_python
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from sentence_transformers import SentenceTransformer, util

nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [3]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Paraphrasing model
model_name = "Vamsi/T5_Paraphrase_Paws"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

# Grammar checker
tool = language_tool_python.LanguageTool('en-US')

# Evaluation models
rouge = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
sim_model = SentenceTransformer('all-MiniLM-L6-v2')

Using device: cpu


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading weights:   0%|          | 0/257 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [4]:
def generate_paraphrase(text, num_return_sequences=1):

    input_text = "paraphrase: " + text + " </s>"


    encoding = tokenizer(
        input_text,
        padding=True,
        truncation=True,
        return_tensors="pt"
    ).to(device)

    outputs = model.generate(
        input_ids=encoding["input_ids"],
        attention_mask=encoding["attention_mask"],
        max_length=256,
        num_beams=5,
        num_return_sequences=num_return_sequences,
        temperature=1.5,
        top_k=120,
        top_p=0.95,
        early_stopping=True
    )

    paraphrases = [
        tokenizer.decode(output, skip_special_tokens=True)
        for output in outputs
    ]

    return paraphrases

In [5]:
def correct_grammar(text):
    matches = tool.check(text)
    corrected = language_tool_python.utils.correct(text, matches)
    return corrected

In [6]:
def evaluate_paraphrase(original, paraphrased):

    # BLEU
    reference = [nltk.word_tokenize(original)]
    candidate = nltk.word_tokenize(paraphrased)
    bleu = sentence_bleu(reference, candidate)

    # ROUGE
    rouge_scores = rouge.score(original, paraphrased)

    # Semantic Similarity
    emb1 = sim_model.encode(original, convert_to_tensor=True)
    emb2 = sim_model.encode(paraphrased, convert_to_tensor=True)
    similarity = util.pytorch_cos_sim(emb1, emb2).item()

    return {
        "BLEU": round(bleu, 4),
        "ROUGE-1": round(rouge_scores['rouge1'].fmeasure, 4),
        "ROUGE-L": round(rouge_scores['rougeL'].fmeasure, 4),
        "Semantic Similarity": round(similarity, 4)
    }

In [7]:
input_text = input("Enter text to paraphrase:\n")

paraphrases = generate_paraphrase(input_text, num_return_sequences=1)

for i, para in enumerate(paraphrases):

    corrected = correct_grammar(para)
    scores = evaluate_paraphrase(input_text, corrected)

    print("\n==============================")
    print(f"Paraphrase {i+1}")
    print("==============================")
    print("Generated:", para)
    print("Grammar Corrected:", corrected)
    print("Evaluation Scores:", scores)

Enter text to paraphrase:
Artificial intelligence is transforming the way businesses operate.


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



Paraphrase 1
Generated: Artificial intelligence is changing the way businesses operate.
Grammar Corrected: Artificial intelligence is changing the way businesses operate.
Evaluation Scores: {'BLEU': 0.5969, 'ROUGE-1': 0.875, 'ROUGE-L': 0.875, 'Semantic Similarity': 0.9569}
