In [1]:
!pip install transformers torch sentencepiece
!pip install nltk spacy language-tool-python
!pip install evaluate rouge-score sacrebleu sentence-transformers




In [2]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

model_name = "t5-small"

tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [3]:
def paraphrase_text(text, max_length=256):
    input_text = "paraphrase: " + text

    encoding = tokenizer.encode_plus(
        input_text,
        return_tensors="pt",
        padding=True,
        truncation=True
    )

    outputs = model.generate(
        input_ids=encoding["input_ids"],
        attention_mask=encoding["attention_mask"],
        max_length=max_length,
        num_beams=5,
        num_return_sequences=1,
        temperature=1.2
    )

    paraphrased = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return paraphrased


In [4]:
text = input("Enter text to paraphrase:\n")


print("Original:", text)
print("Paraphrased:", paraphrase_text(text))


Enter text to paraphrase:
Artificial intelligence is transforming the way industries operate.


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Original: Artificial intelligence is transforming the way industries operate.
Paraphrased: Paraphrase: Artificial intelligence transforms the way industries operate.


In [5]:
import language_tool_python

tool = language_tool_python.LanguageTool('en-US')

def grammar_check(text):
    matches = tool.check(text)
    corrected_text = language_tool_python.utils.correct(text, matches)
    return corrected_text


In [6]:
paraphrased = paraphrase_text(text)
final_output = grammar_check(paraphrased)

print(final_output)


Paraphrase: Artificial intelligence transforms the way industries operate.


In [7]:
from sentence_transformers import SentenceTransformer, util

sim_model = SentenceTransformer('all-MiniLM-L6-v2')

def semantic_similarity(text1, text2):
    emb1 = sim_model.encode(text1, convert_to_tensor=True)
    emb2 = sim_model.encode(text2, convert_to_tensor=True)
    score = util.cos_sim(emb1, emb2)
    return score.item()


In [8]:
score = semantic_similarity(text, final_output)
print("Semantic Similarity Score:", score)


Semantic Similarity Score: 0.8764650225639343


In [9]:
import evaluate

bleu = evaluate.load("bleu")

def bleu_score(reference, candidate):
    return bleu.compute(predictions=[candidate], references=[[reference]])


In [10]:
rouge = evaluate.load("rouge")

def rouge_score(reference, candidate):
    return rouge.compute(predictions=[candidate], references=[reference])


In [11]:
print("BLEU:", bleu_score(text, final_output))
print("ROUGE:", rouge_score(text, final_output))


BLEU: {'bleu': 0.4518010018049224, 'precisions': [0.7, 0.5555555555555556, 0.375, 0.2857142857142857], 'brevity_penalty': 1.0, 'length_ratio': 1.1111111111111112, 'translation_length': 10, 'reference_length': 9}
ROUGE: {'rouge1': np.float64(0.75), 'rouge2': np.float64(0.5714285714285714), 'rougeL': np.float64(0.75), 'rougeLsum': np.float64(0.75)}
