#### This notebook uses Pegasus Transformer Model for paraphrasing. it also implements a function to split the sentences correctly to be paraphrased to avoid plagarism detection 

In [1]:
!pip install transformers
!pip install sentence_splitter
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.29.1-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m47.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.29.1
Looking in in

In [2]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/86.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

In [3]:
def get_response(input_text,num_return_sequences):
  batch = tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
  translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
  tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
  return tgt_text

In [4]:
text = "Cutting carbon emissions to mitigate climate change and reducing the over-dependence on fossil fuels are listed as a priority in a great number of countries throughout the world."
len(text.split())

28

In [5]:
get_response(text, 2)

['Reducing the over-dependence on fossil fuels and cutting carbon emissions are both listed as priorities by many countries.',
 'Reducing the over-dependence on fossil fuels and cutting carbon emissions are listed as priorities by many countries.']

In [6]:
context = "Cutting carbon emissions to mitigate climate change and reducing the over-dependence on fossil fuels are listed as a priority in a great number of countries throughout the world. As a result, investments in the energy sector are shifting towards renewable energies; among all the renewables, wind energy is experiencing remarkable growth in recent years. Globally, the wind energy sector has undergone two record years in new capacity installations – a total of 93 GW in 2020 [1] and 94 GW in 2021 [2]. However, these reports also emphasise the need to quadruple the current growth rate to stay on course for the ambitious goals of the Paris Agreement [3] and the European Union Green Deal [4], i.e., net-zero emissions by 2050."

In [7]:
from sentence_splitter import SentenceSplitter, split_text_into_sentences
splitter = SentenceSplitter(language='en')
sentence_list = splitter.split(context)
sentence_list

['Cutting carbon emissions to mitigate climate change and reducing the over-dependence on fossil fuels are listed as a priority in a great number of countries throughout the world.',
 'As a result, investments in the energy sector are shifting towards renewable energies; among all the renewables, wind energy is experiencing remarkable growth in recent years.',
 'Globally, the wind energy sector has undergone two record years in new capacity installations – a total of 93 GW in 2020 [1] and 94 GW in 2021 [2].',
 'However, these reports also emphasise the need to quadruple the current growth rate to stay on course for the ambitious goals of the Paris Agreement [3] and the European Union Green Deal [4], i.e., net-zero emissions by 2050.']