In [1]:
import nltk
from nltk.tokenize import sent_tokenize
import warnings
warnings.filterwarnings("ignore")
from transformers import *
from parrot import Parrot
import numpy as np

In [2]:
def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5, num_beams=5):
    # tokenize the text to be form of a list of token IDs
    inputs = tokenizer([sentence], truncation=True, padding="longest", return_tensors="pt")
    # generate the paraphrased sentences
    outputs = model.generate(
    **inputs,
    num_beams=num_beams,
    num_return_sequences=num_return_sequences,
    )
    # decode the generated sentences using the tokenizer to get them back to text
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

In [3]:
def pegasus(sentence):
    model = PegasusForConditionalGeneration.from_pretrained("tuner007/pegasus_paraphrase")
    tokenizer = PegasusTokenizerFast.from_pretrained("tuner007/pegasus_paraphrase")
    return get_paraphrased_sentences(model, tokenizer, sentence, num_beams=10, num_return_sequences=5)

In [4]:
def t5(sentence):
    tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
    model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
    return get_paraphrased_sentences(model, tokenizer, sentence, num_beams=10, num_return_sequences=5)

In [5]:
text = "India is a vibrant and diverse country located in South Asia. It is the seventh largest country by area and the second most populous country in the world, with over 1.3 billion people. India has a rich history, dating back to ancient civilizations, and has been shaped by a mix of cultures and traditions from different parts of the world. The country is known for its diverse landscapes, including the Himalayan mountain range in the north, the Arabian Sea and Bay of Bengal in the west and east, and the Deccan Plateau in the south. The official language of India is Hindi, but there are over 20 other recognized languages spoken throughout the country. India is also home to numerous religious communities, including Hinduism, Islam, Christianity, Buddhism, and Sikhism. This diversity is reflected in the country's food, music, dance, and art, which are a unique combination of traditional and modern influences. Despite facing economic and social challenges, such as poverty, corruption, and discrimination, India has made significant progress in recent years. The country has a rapidly growing economy, driven by a growing middle class, technological advancements, and the rise of entrepreneurship. India is also home to some of the world's leading universities, research institutions, and companies, making it an important player in the global economy. In conclusion, India is a country that showcases a unique blend of ancient history and modern development. Its rich cultural diversity and economic promise make it a fascinating place to explore and a vital player on the world stage."
sentences = sent_tokenize(text)
peg_results = list('')
t5_results = list('')

In [6]:
for sentence in sentences:
    peg_results.append(pegasus(sentence))

loading configuration file config.json from cache at C:\Users\Dell/.cache\huggingface\hub\models--tuner007--pegasus_paraphrase\snapshots\0159e2949ca73657a2f1329898f51b7bb53b9ab2\config.json
Model config PegasusConfig {
  "activation_dropout": 0.1,
  "activation_function": "relu",
  "add_bias_logits": false,
  "add_final_layer_norm": true,
  "architectures": [
    "PegasusForConditionalGeneration"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 16,
  "decoder_start_token_id": 0,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 16,
  "eos_token_id": 1,
  "extra_pos_embeddings": 1,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "init_std": 0.02,
  "is_

In [7]:
for sentence in sentences:
    t5_results.append(t5(sentence))

loading configuration file config.json from cache at C:\Users\Dell/.cache\huggingface\hub\models--Vamsi--T5_Paraphrase_Paws\snapshots\3bbf07dc42d5ddc9ca77c5589ce7239b0b731832\config.json
Model config T5Config {
  "_name_or_path": "Vamsi/T5_Paraphrase_Paws",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_lengt

In [8]:
peg_results

[['South Asia contains India, a vibrant and diverse country.',
  'South Asia has a vibrant and diverse country in India.',
  'South Asia has a vibrant and diverse country called India.',
  'India is located in South Asia.',
  'South Asia has a vibrant and diverse country, India.'],
 ['It is the second most populous country in the world with over 1.3 billion people.',
  'It is the second most populous country in the world with over one billion people.',
  'The second most populous country in the world has over 1.3 billion people.',
  'It is the second most populous country in the world with more than one billion people.',
  'It is the second most populous country in the world, with over 1.3 billion people.'],
 ['India has a rich history dating back to ancient civilizations and has been shaped by a mix of cultures and traditions from different parts of the world.',
  'India has a rich history, dating back to ancient civilizations, and has been shaped by a mix of cultures and traditions f

In [9]:
t5_results

[['India is a vibrant and diverse country in South Asia.',
  'India is a vibrant and diverse country located in South Asia.',
  'India is a vibrant and diverse country in South Asia.',
  'India is a vibrant and varied country in South Asia.',
  'India is a vibrant and diverse country based in South Asia.'],
 ['It is the seventh largest country by area and the second most populous country in the world with',
  'It is the seventh largest country by area and the second most populous country in the world,',
  'It is the seventh largest country by area and second most populous country in the world with over',
  'It is the seventh largest country by area and second most populous country in the world, with',
  'It is the seventh largest country by area and the second largest country in the world with over '],
 ['India has a rich history dating back to ancient civilizations and has been shaped by ',
  'India has a rich history, dating back to ancient civilizations, and has been shaped',
  'Ind