In [None]:
! pip install sentence-splitter #will help splitting paragraphs into sentences

In [None]:
! pip install transformers

In [None]:
! pip install SentencePiece #will offer encoding and decoding of sentences. 

# Importing Libraries & Dependencies

---

In [None]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

In [None]:
import warnings
warnings.filterwarnings('ignore')

# Setting Up the PEGASUS Model

---

In [None]:
model_name = 'tuner007/pegasus_paraphrase'

In [None]:
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
tokenizer = PegasusTokenizer.from_pretrained(model_name)

In [None]:
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

In [None]:
def get_response(input_text,num_return_sequences):
    batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
    translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
    tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    return tgt_text

# Testing model on a Single Sentence

---


In [None]:
#test input sentence
text = "Data science is an interdisciplinary field that uses scientific methods, processes, algorithms and systems to extract knowledge and insights from noisy, structured and unstructured data, and apply knowledge and actionable insights from data across a broad range of application domains. "

#printing response
get_response(text, 5) 

#set the number of responses to 5, so got five different paraphrase responses by the model. 



# Testing Model on Paragraph

---

In [None]:
context = "Data science is an interdisciplinary field that uses scientific methods, processes, algorithms and systems to extract knowledge and insights from noisy, structured and unstructured data and apply knowledge and actionable insights from data across a broad range of application domains. Data science is related to data mining, machine learning and big data.Data science is a concept to unify statistics, data analysis, informatics, and their related methods in order to understand and analyze actual phenomena with data. It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and domain knowledge. However, data science is different from computer science and information science. Turing Award winner Jim Gray imagined data science as a fourth paradigm of science (empirical, theoretical, computational, and now data-driven) and asserted that everything about science is changing because of the impact of information technology and the data deluge.A data scientist is someone who creates programming code, and combines it with statistical knowledge to create insights from data"

In [None]:
print(context)

In [None]:
### making use of the sentence splitter library ###

#Takes the input paragraph and splits it into a list of sentences
from sentence_splitter import SentenceSplitter, split_text_into_sentences
 
splitter = SentenceSplitter(language='en')
 
sentence_list = splitter.split(context)
sentence_list

In [None]:
paraphrase = []
 
for i in sentence_list:
    a = get_response(i,1)
    paraphrase.append(a)

# Generating the paraphrased text
paraphrase

In [None]:
#creating the second split
paraphrase2 = [' '.join(x) for x in paraphrase]
paraphrase2

In [None]:
# Combine the above splitted lists into a paragraph
paraphrase3 = [' '.join(x for x in paraphrase2) ]
paraphrased_text = str(paraphrase3).strip('[]').strip("'")
paraphrased_text

# Comparison of Original and Paraphrased Text

--- 

In [None]:
print("ORIGINAL : \n" + "-"*100 + "\n" + context)
print("\n\n")
print("PARAPHRASED : \n" + "-"*100 + "\n" + paraphrased_text)

# Writing Results to Text File

--- 

In [None]:
context = "ORIGINAL : \n" + "-"*100 + "\n" + context
paraphrased_text = "PARAPHRASED : \n" + "-"*100 + "\n" + paraphrased_text

In [None]:
#open text file
text_file = open("paraprasing_using_PEGASUS.txt", "w")
 
#write string to file
original = text_file.write(context)

text_file.write("\n\n\n")

paraphrased = text_file.write(paraphrased_text)

 
#close file
text_file.close()